[Cluster-devel] [PATCH dlm-next 1/2] fs: dlm: don't call kernel_getpeername() in error_report()
Alexander Aring
aahringo at redhat.com
Fri Nov 12 22:02:39 UTC 2021
In some cases kernel_getpeername() will held the socket lock which is
already held when the socket layer calls error_report() callback. Since
commit 9dfc685e0262 ("inet: remove races in inet{6}_getname()") this
problem becomes more likely because the socket lock will be held always.
You will see something like:
bob9-u5 login: [ 562.316860] BUG: spinlock recursion on CPU#7, swapper/7/0
[ 562.318562] lock: 0xffff8f2284720088, .magic: dead4ead, .owner: swapper/7/0, .owner_cpu: 7
[ 562.319522] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.15.0+ #135
[ 562.320346] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.13.0-2.module+el8.3.0+7353+9de0a3cc 04/01/2014
[ 562.321277] Call Trace:
[ 562.321529] <IRQ>
[ 562.321734] dump_stack_lvl+0x33/0x42
[ 562.322282] do_raw_spin_lock+0x8b/0xc0
[ 562.322674] lock_sock_nested+0x1e/0x50
[ 562.323057] inet_getname+0x39/0x110
[ 562.323425] ? sock_def_readable+0x80/0x80
[ 562.323838] lowcomms_error_report+0x63/0x260 [dlm]
[ 562.324338] ? wait_for_completion_interruptible_timeout+0xd2/0x120
[ 562.324949] ? lock_timer_base+0x67/0x80
[ 562.325330] ? do_raw_spin_unlock+0x49/0xc0
[ 562.325735] ? _raw_spin_unlock_irqrestore+0x1e/0x40
[ 562.326218] ? del_timer+0x54/0x80
[ 562.326549] sk_error_report+0x12/0x70
[ 562.326919] tcp_validate_incoming+0x3c8/0x530
[ 562.327347] ? kvm_clock_read+0x14/0x30
[ 562.327718] ? ktime_get+0x3b/0xa0
[ 562.328055] tcp_rcv_established+0x121/0x660
[ 562.328466] tcp_v4_do_rcv+0x132/0x260
[ 562.328835] tcp_v4_rcv+0xcea/0xe20
[ 562.329173] ip_protocol_deliver_rcu+0x35/0x1f0
[ 562.329615] ip_local_deliver_finish+0x54/0x60
[ 562.330050] ip_local_deliver+0xf7/0x110
[ 562.330431] ? inet_rtm_getroute+0x211/0x840
[ 562.330848] ? ip_protocol_deliver_rcu+0x1f0/0x1f0
[ 562.331310] ip_rcv+0xe1/0xf0
[ 562.331603] ? ip_local_deliver+0x110/0x110
[ 562.332011] __netif_receive_skb_core+0x46a/0x1040
[ 562.332476] ? inet_gro_receive+0x263/0x2e0
[ 562.332885] __netif_receive_skb_list_core+0x13b/0x2c0
[ 562.333383] netif_receive_skb_list_internal+0x1c8/0x2f0
[ 562.333896] ? update_load_avg+0x7e/0x5e0
[ 562.334285] gro_normal_list.part.149+0x19/0x40
[ 562.334722] napi_complete_done+0x67/0x160
[ 562.335134] virtnet_poll+0x2ad/0x408 [virtio_net]
[ 562.335644] __napi_poll+0x28/0x140
[ 562.336012] net_rx_action+0x23d/0x300
[ 562.336414] __do_softirq+0xf2/0x2ea
[ 562.336803] irq_exit_rcu+0xc1/0xf0
[ 562.337173] common_interrupt+0xb9/0xd0
It is and was always forbidden to call kernel_getpeername() in context
of error_report(). To get rid of the problem we save the peer address in
the connection struct and access it in error_report() without using
kernel_getpeername() in this context.
Fixes: 1a31833d085a ("DLM: Replace nodeid_to_addr with kernel_getpeername")
Reported-by: Bob Peterson <rpeterso at redhat.com>
Signed-off-by: Alexander Aring <aahringo at redhat.com>
---
fs/dlm/lowcomms.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 2f070514b3ee..e319309066b3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -68,6 +68,7 @@
struct connection {
struct socket *sock; /* NULL if not connected */
+ struct sockaddr_storage addr;
uint32_t nodeid; /* So we know who we are in the list */
struct mutex sock_mutex;
unsigned long flags;
@@ -594,7 +595,6 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
- struct sockaddr_storage saddr;
void (*orig_report)(struct sock *) = NULL;
read_lock_bh(&sk->sk_callback_lock);
@@ -603,14 +603,8 @@ static void lowcomms_error_report(struct sock *sk)
goto out;
orig_report = listen_sock.sk_error_report;
- if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
- printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d, port %d, "
- "sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, dlm_config.ci_tcp_port,
- sk->sk_err, sk->sk_err_soft);
- } else if (saddr.ss_family == AF_INET) {
- struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
+ if (con->addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin4 = (struct sockaddr_in *)&con->addr;
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d at %pI4, port %d, "
@@ -619,7 +613,7 @@ static void lowcomms_error_report(struct sock *sk)
dlm_config.ci_tcp_port, sk->sk_err,
sk->sk_err_soft);
} else {
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&con->addr;
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d at %u.%u.%u.%u, "
@@ -1072,6 +1066,7 @@ static int accept_from_sock(struct listen_connection *con)
close_connection(othercon, false, true, false);
}
+ memcpy(&othercon->addr, &peeraddr, sizeof(othercon->addr));
mutex_lock(&othercon->sock_mutex);
add_sock(newsock, othercon);
addcon = othercon;
@@ -1081,6 +1076,7 @@ static int accept_from_sock(struct listen_connection *con)
/* accept copies the sk after we've saved the callbacks, so we
don't want to save them a second time or comm errors will
result in calling sk_error_report recursively. */
+ memcpy(&newcon->addr, &peeraddr, sizeof(newcon->addr));
add_sock(newsock, newcon);
addcon = newcon;
}
@@ -1545,6 +1541,7 @@ static void dlm_connect(struct connection *con)
return;
}
+ memcpy(&con->addr, &addr, sizeof(con->addr));
/* Create a socket to communicate with */
result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
SOCK_STREAM, dlm_proto_ops->proto, &sock);
--
2.27.0
More information about the Cluster-devel
mailing list