From 05fe7848aa784da5bc5a5f8b16c6d39b65796042 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2015 21:51:19 -0700 Subject: [PATCH] tcp: fix a potential deadlock in tcp_get_info() Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [] lock_acquire+0xbb/0x270 [ 523.802565] [] _raw_spin_lock+0x38/0x50 [ 523.808628] [] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [] tcp_check_req+0x3c1/0x500 [ 523.828199] [] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [] ip_local_deliver+0xaa/0xc0 [ 523.853440] [] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Change-Id: I50c833b77f41111a6fa3a57834c6311f95573754 --- include/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 10 ++++++---- net/ipv4/tcp_input.c | 4 ++++ net/ipv4/tcp_ipv4.c | 1 + 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 39aea2bd92d5..3781c6678ed3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -339,6 +339,8 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ + u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 98e6a290500d..2179f95e7751 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2503,6 +2503,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + unsigned int start; memset(info, 0, sizeof(*info)); @@ -2566,12 +2567,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ? sk->sk_max_pacing_rate : ~0ULL; - spin_lock_bh(&sk->sk_lock.slock); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + do { + start = u64_stats_fetch_begin_irq(&tp->syncp); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - spin_unlock_bh(&sk->sk_lock.slock); if (sk->sk_socket) { struct file *filep = sk->sk_socket->file; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4f482fe43ede..e150e395d28c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3539,7 +3539,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { u32 delta = ack - tp->snd_una; + u64_stats_update_begin(&tp->syncp); tp->bytes_acked += delta; + u64_stats_update_end(&tp->syncp); tp->snd_una = ack; } @@ -3548,7 +3550,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { u32 delta = seq - tp->rcv_nxt; + u64_stats_update_begin(&tp->syncp); tp->bytes_received += delta; + u64_stats_update_end(&tp->syncp); tp->rcv_nxt = seq; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5ce5d56f28cd..19ea9b67c480 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1908,6 +1908,7 @@ static int tcp_v4_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; + u64_stats_init(&tp->syncp); tp->reordering = sysctl_tcp_reordering; icsk->icsk_ca_ops = &tcp_init_congestion_ops;