All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhongya Yan <yan2228598786@gmail.com>
To: kuba@kernel.org
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	edumazet@google.com, rostedt@goodmis.org, mingo@redhat.com,
	davem@davemloft.net, yoshfuji@linux-ipv6.org, dsahern@kernel.org,
	hengqi.chen@gmail.com, yhs@fb.com,
	Zhongya Yan <yan2228598786@gmail.com>
Subject: [PATCH] net: tcp_drop adds `reason` parameter for tracing v2
Date: Wed, 25 Aug 2021 08:40:43 -0700	[thread overview]
Message-ID: <20210825154043.247764-1-yan2228598786@gmail.com> (raw)

In this version, fix and modify some code issues. Changed the reason for `tcp_drop` from an enumeration to a mask and enumeration usage in the trace output.
By shifting `__LINE__` left by 6 bits to accommodate the `tcp_drop` call method source, 6 bits are enough to use. This allows for a more granular identification of the reason for calling `tcp_drop` without conflicts and essentially without overflow.
Example.
```
enum {
TCP_OFO_QUEUE = 1
}
reason = __LINE__ << 6
reason |= TCP_OFO_QUEUE
```
Suggestions from Jakub Kicinski, Eric Dumazet, much appreciated.

Modified the expression of the enumeration, and the use of the output under the trace definition.
Suggestion from Steven Rostedt. Thanks.

Signed-off-by: Zhongya Yan <yan2228598786@gmail.com>
---
 include/net/tcp.h          |  18 +++---
 include/trace/events/tcp.h |  39 +++++++++++--
 net/ipv4/tcp_input.c       | 114 ++++++++++++++++++++++---------------
 3 files changed, 112 insertions(+), 59 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index dd8cd8d6f2f1..75614a252675 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -256,15 +256,19 @@ extern unsigned long tcp_memory_pressure;
 
 enum tcp_drop_reason {
 	TCP_OFO_QUEUE = 1,
-	TCP_DATA_QUEUE_OFO = 2,
-	TCP_DATA_QUEUE = 3,
-	TCP_PRUNE_OFO_QUEUE = 4,
-	TCP_VALIDATE_INCOMING = 5,
-	TCP_RCV_ESTABLISHED = 6,
-	TCP_RCV_SYNSENT_STATE_PROCESS = 7,
-	TCP_RCV_STATE_PROCESS = 8
+	TCP_DATA_QUEUE_OFO,
+	TCP_DATA_QUEUE,
+	TCP_PRUNE_OFO_QUEUE,
+	TCP_VALIDATE_INCOMING,
+	TCP_RCV_ESTABLISHED,
+	TCP_RCV_SYNSENT_STATE_PROCESS,
+	TCP_RCV_STATE_PROCESS
 };
 
+#define TCP_DROP_MASK(line, code)	((line << 6) | code) /* reason for masking */
+#define TCP_DROP_LINE(mask)		(mask >> 6)	/* Cause decode to get unique identifier */
+#define TCP_DROP_CODE(mask)		(mask&0x3F) /* Cause decode get function code */
+
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index a0d3d31eb591..699539702ea9 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -371,8 +371,26 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
 	TP_ARGS(skb)
 );
 
+// from author @{Steven Rostedt}
+#define TCP_DROP_REASON							\
+	REASON_STRING(TCP_OFO_QUEUE,		ofo_queue)			\
+	REASON_STRING(TCP_DATA_QUEUE_OFO,		data_queue_ofo)			\
+	REASON_STRING(TCP_DATA_QUEUE,		data_queue)			\
+	REASON_STRING(TCP_PRUNE_OFO_QUEUE,		prune_ofo_queue)		\
+	REASON_STRING(TCP_VALIDATE_INCOMING,	validate_incoming)		\
+	REASON_STRING(TCP_RCV_ESTABLISHED,		rcv_established)		\
+	REASON_STRING(TCP_RCV_SYNSENT_STATE_PROCESS, rcv_synsent_state_process)	\
+	REASON_STRINGe(TCP_RCV_STATE_PROCESS,	rcv_state_proces)
+
+#undef REASON_STRING
+#undef REASON_STRINGe
+
+#define REASON_STRING(code, name) {code , #name},
+#define REASON_STRINGe(code, name) {code, #name}
+
+
 TRACE_EVENT(tcp_drop,
-		TP_PROTO(struct sock *sk, struct sk_buff *skb, enum tcp_drop_reason reason),
+		TP_PROTO(struct sock *sk, struct sk_buff *skb, __u32 reason),
 
 		TP_ARGS(sk, skb, reason),
 
@@ -392,6 +410,8 @@ TRACE_EVENT(tcp_drop,
 			__field(__u32, rcv_wnd)
 			__field(__u64, sock_cookie)
 			__field(__u32, reason)
+			__field(__u32, reason_code)
+			__field(__u32, reason_line)
 			),
 
 		TP_fast_assign(
@@ -415,16 +435,23 @@ TRACE_EVENT(tcp_drop,
 				__entry->snd_wnd = tp->snd_wnd;
 				__entry->rcv_wnd = tp->rcv_wnd;
 				__entry->ssthresh = tcp_current_ssthresh(sk);
-		__entry->srtt = tp->srtt_us >> 3;
-		__entry->sock_cookie = sock_gen_cookie(sk);
-		__entry->reason = reason;
+				__entry->srtt = tp->srtt_us >> 3;
+				__entry->sock_cookie = sock_gen_cookie(sk);
+				__entry->reason = reason;
+				__entry->reason_code = TCP_DROP_CODE(reason);
+				__entry->reason_line = TCP_DROP_LINE(reason);
 		),
 
-		TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx reason=%d",
+		TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x \
+				snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u \
+				sock_cookie=%llx reason=%d reason_type=%s reason_line=%d",
 				__entry->saddr, __entry->daddr, __entry->mark,
 				__entry->data_len, __entry->snd_nxt, __entry->snd_una,
 				__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
-				__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, __entry->reason)
+				__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
+				__entry->reason,
+				__print_symbolic(__entry->reason_code, TCP_DROP_REASON),
+				__entry->reason_line)
 );
 
 #endif /* _TRACE_TCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 83e31661876b..7dfcc4253c35 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4643,20 +4643,14 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
 	return res;
 }
 
-static void __tcp_drop(struct sock *sk,
-		   struct sk_buff *skb)
-{
-	sk_drops_add(sk, skb);
-	__kfree_skb(skb);
-}
 
-/* tcp_drop whit reason,for epbf trace
+/* tcp_drop with reason
  */
 static void tcp_drop(struct sock *sk, struct sk_buff *skb,
-		 enum tcp_drop_reason reason)
+		 __u32 reason)
 {
 	trace_tcp_drop(sk, skb, reason);
-	__tcp_drop(sk, skb);
+	__kfree_skb(skb);
 }
 
 /* This one checks to see if we can put data from the
@@ -5621,7 +5615,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 						  LINUX_MIB_TCPACKSKIPPEDPAWS,
 						  &tp->last_oow_ack_time))
 				tcp_send_dupack(sk, skb);
-			goto discard;
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+			goto end;
 		}
 		/* Reset is accepted even if it did not pass PAWS. */
 	}
@@ -5644,7 +5639,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		} else if (tcp_reset_check(sk, skb)) {
 			tcp_reset(sk, skb);
 		}
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+		goto end;
 	}
 
 	/* Step 2: check RST bit */
@@ -5689,7 +5685,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 				tcp_fastopen_active_disable(sk);
 			tcp_send_challenge_ack(sk, skb);
 		}
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+		goto end;
 	}
 
 	/* step 3: check security and precedence [ignored] */
@@ -5703,15 +5700,15 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 			TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
 		tcp_send_challenge_ack(sk, skb);
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+		goto end;
 	}
 
 	bpf_skops_parse_hdr(sk, skb);
 
 	return true;
 
-discard:
-	tcp_drop(sk, skb, TCP_VALIDATE_INCOMING);
+end:
 	return false;
 }
 
@@ -5829,7 +5826,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				return;
 			} else { /* Header too small */
 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-				goto discard;
+				tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+				goto end;
 			}
 		} else {
 			int eaten = 0;
@@ -5883,8 +5881,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
 		goto csum_error;
 
-	if (!th->ack && !th->rst && !th->syn)
-		goto discard;
+	if (!th->ack && !th->rst && !th->syn) {
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+		goto end;
+	}
 
 	/*
 	 *	Standard slow path.
@@ -5894,8 +5894,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 		return;
 
 step5:
-	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
-		goto discard;
+	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) {
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+		goto end;
+	}
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
 
@@ -5913,9 +5915,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	trace_tcp_bad_csum(skb);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-
-discard:
-	tcp_drop(sk, skb, TCP_RCV_ESTABLISHED);
+	tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+end:
+	return;
 }
 EXPORT_SYMBOL(tcp_rcv_established);
 
@@ -6115,7 +6117,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (th->rst) {
 			tcp_reset(sk, skb);
-			goto discard;
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+			goto end;
 		}
 
 		/* rfc793:
@@ -6204,9 +6207,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
 
-discard:
-			tcp_drop(sk, skb, TCP_RCV_SYNSENT_STATE_PROCESS);
+end:
 			return 0;
 		} else {
 			tcp_send_ack(sk);
@@ -6279,7 +6282,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 		return -1;
 #else
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+		goto end;
 #endif
 	}
 	/* "fifth, if neither of the SYN or RST bits is set then
@@ -6289,7 +6293,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 discard_and_undo:
 	tcp_clear_options(&tp->rx_opt);
 	tp->rx_opt.mss_clamp = saved_clamp;
-	goto discard;
+	tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+	goto end;
 
 reset_and_undo:
 	tcp_clear_options(&tp->rx_opt);
@@ -6347,18 +6352,23 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
 	switch (sk->sk_state) {
 	case TCP_CLOSE:
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		goto end;
 
 	case TCP_LISTEN:
 		if (th->ack)
 			return 1;
 
-		if (th->rst)
-			goto discard;
+		if (th->rst) {
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			goto end;
+		}
 
 		if (th->syn) {
-			if (th->fin)
-				goto discard;
+			if (th->fin) {
+				tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+				goto end;
+			}
 			/* It is possible that we process SYN packets from backlog,
 			 * so we need to make sure to disable BH and RCU right there.
 			 */
@@ -6373,7 +6383,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			consume_skb(skb);
 			return 0;
 		}
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		goto end;
 
 	case TCP_SYN_SENT:
 		tp->rx_opt.saw_tstamp = 0;
@@ -6399,12 +6410,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 		    sk->sk_state != TCP_FIN_WAIT1);
 
-		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
-			goto discard;
+		if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			goto end;
+		}
 	}
 
-	if (!th->ack && !th->rst && !th->syn)
-		goto discard;
+	if (!th->ack && !th->rst && !th->syn) {
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		goto end;
+	}
 
 	if (!tcp_validate_incoming(sk, skb, th, 0))
 		return 0;
@@ -6418,7 +6433,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_state == TCP_SYN_RECV)
 			return 1;	/* send one RST */
 		tcp_send_challenge_ack(sk, skb);
-		goto discard;
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		goto end;
 	}
 	switch (sk->sk_state) {
 	case TCP_SYN_RECV:
@@ -6511,7 +6527,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			inet_csk_reset_keepalive_timer(sk, tmo);
 		} else {
 			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
-			goto discard;
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			goto end;
 		}
 		break;
 	}
@@ -6519,7 +6536,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	case TCP_CLOSING:
 		if (tp->snd_una == tp->write_seq) {
 			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
-			goto discard;
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			goto end;
 		}
 		break;
 
@@ -6527,7 +6545,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (tp->snd_una == tp->write_seq) {
 			tcp_update_metrics(sk);
 			tcp_done(sk);
-			goto discard;
+			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			goto end;
 		}
 		break;
 	}
@@ -6544,8 +6563,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			/* If a subflow has been reset, the packet should not
 			 * continue to be processed, drop the packet.
 			 */
-			if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
-				goto discard;
+			if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
+				tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+				goto end;
+			}
 			break;
 		}
 		fallthrough;
@@ -6577,9 +6598,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (!queued) {
-discard:
-		tcp_drop(sk, skb, TCP_RCV_STATE_PROCESS);
+		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
 	}
+
+end:
 	return 0;
 }
 EXPORT_SYMBOL(tcp_rcv_state_process);
-- 
2.25.1


             reply	other threads:[~2021-08-25 15:41 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-25 15:40 Zhongya Yan [this message]
2021-08-25 15:47 ` [PATCH] net: tcp_drop adds `reason` parameter for tracing v2 Eric Dumazet
2021-08-25 16:04   ` Jakub Kicinski
2021-08-25 16:20     ` Eric Dumazet
2021-08-25 17:06       ` Jakub Kicinski
2021-08-26  3:19   ` Steven Rostedt
2021-08-26  5:13     ` Brendan Gregg
2021-09-01 14:36       ` Steven Rostedt
2021-09-01 15:20         ` Eric Dumazet
2021-09-01 17:44           ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210825154043.247764-1-yan2228598786@gmail.com \
    --to=yan2228598786@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=hengqi.chen@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=yhs@fb.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.