linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: tcp_drop adds `reason` and SNMP parameters for tracing v3
@ 2021-09-02  9:33 Zhongya Yan
  2021-09-02 15:44 ` Eric Dumazet
  0 siblings, 1 reply; 2+ messages in thread
From: Zhongya Yan @ 2021-09-02  9:33 UTC (permalink / raw)
  To: edumazet
  Cc: netdev, linux-kernel, kuba, rostedt, mingo, davem, yoshfuji,
	dsahern, hengqi.chen, yhs, brendan.d.gregg, 2228598786,
	Zhongya Yan

I used the suggestion from `Brendan Gregg`. In addition to the
`reason` parameter there is also the `field` parameter pointing
to `SNMP` to distinguish the `tcp_drop` cause. I know what I
submitted is not accurate, so I am submitting the current
patch to get comments and criticism from everyone so that I
can submit better code and solutions.And of course to make me
more familiar and understand the `linux` kernel network code.
Thank you everyone!

Signed-off-by: Zhongya Yan <yan2228598786@gmail.com>
---
 include/trace/events/tcp.h |  39 +++---------
 net/ipv4/tcp_input.c       | 126 ++++++++++++++-----------------------
 2 files changed, 57 insertions(+), 108 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 699539702ea9..80892660458e 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -371,28 +371,10 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
 	TP_ARGS(skb)
 );
 
-// from author @{Steven Rostedt}
-#define TCP_DROP_REASON							\
-	REASON_STRING(TCP_OFO_QUEUE,		ofo_queue)			\
-	REASON_STRING(TCP_DATA_QUEUE_OFO,		data_queue_ofo)			\
-	REASON_STRING(TCP_DATA_QUEUE,		data_queue)			\
-	REASON_STRING(TCP_PRUNE_OFO_QUEUE,		prune_ofo_queue)		\
-	REASON_STRING(TCP_VALIDATE_INCOMING,	validate_incoming)		\
-	REASON_STRING(TCP_RCV_ESTABLISHED,		rcv_established)		\
-	REASON_STRING(TCP_RCV_SYNSENT_STATE_PROCESS, rcv_synsent_state_process)	\
-	REASON_STRINGe(TCP_RCV_STATE_PROCESS,	rcv_state_proces)
-
-#undef REASON_STRING
-#undef REASON_STRINGe
-
-#define REASON_STRING(code, name) {code , #name},
-#define REASON_STRINGe(code, name) {code, #name}
-
-
 TRACE_EVENT(tcp_drop,
-		TP_PROTO(struct sock *sk, struct sk_buff *skb, __u32 reason),
+		TP_PROTO(struct sock *sk, struct sk_buff *skb, int field, const char *reason),
 
-		TP_ARGS(sk, skb, reason),
+		TP_ARGS(sk, skb, field, reason),
 
 		TP_STRUCT__entry(
 			__array(__u8, saddr, sizeof(struct sockaddr_in6))
@@ -409,9 +391,8 @@ TRACE_EVENT(tcp_drop,
 			__field(__u32, srtt)
 			__field(__u32, rcv_wnd)
 			__field(__u64, sock_cookie)
-			__field(__u32, reason)
-			__field(__u32, reason_code)
-			__field(__u32, reason_line)
+			__field(int, field)
+			__string(reason, reason)
 			),
 
 		TP_fast_assign(
@@ -437,21 +418,19 @@ TRACE_EVENT(tcp_drop,
 				__entry->ssthresh = tcp_current_ssthresh(sk);
 				__entry->srtt = tp->srtt_us >> 3;
 				__entry->sock_cookie = sock_gen_cookie(sk);
-				__entry->reason = reason;
-				__entry->reason_code = TCP_DROP_CODE(reason);
-				__entry->reason_line = TCP_DROP_LINE(reason);
+				__entry->field = field;
+
+				__assign_str(reason, reason);
 		),
 
 		TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x \
 				snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u \
-				sock_cookie=%llx reason=%d reason_type=%s reason_line=%d",
+				sock_cookie=%llx field=%d reason=%s",
 				__entry->saddr, __entry->daddr, __entry->mark,
 				__entry->data_len, __entry->snd_nxt, __entry->snd_una,
 				__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
 				__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
-				__entry->reason,
-				__print_symbolic(__entry->reason_code, TCP_DROP_REASON),
-				__entry->reason_line)
+				field, __get_str(reason))
 );
 
 #endif /* _TRACE_TCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b2bc49f1f0de..bd33fd466e1e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,7 +100,6 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
 #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
 #define FLAG_ACK_MAYBE_DELAYED	0x10000 /* Likely a delayed ACK */
-#define FLAG_DSACK_TLP		0x20000 /* DSACK for tail loss probe */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -455,12 +454,11 @@ static void tcp_sndbuf_expand(struct sock *sk)
  */
 
 /* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
-			     unsigned int skbtruesize)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
-	int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+	int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
 	int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
 
 	while (tp->rcv_ssthresh <= window) {
@@ -473,27 +471,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
 	return 0;
 }
 
-/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
- * can play nice with us, as sk_buff and skb->head might be either
- * freed or shared with up to MAX_SKB_FRAGS segments.
- * Only give a boost to drivers using page frag(s) to hold the frame(s),
- * and if no payload was pulled in skb->head before reaching us.
- */
-static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
-{
-	u32 truesize = skb->truesize;
-
-	if (adjust && !skb_headlen(skb)) {
-		truesize -= SKB_TRUESIZE(skb_end_offset(skb));
-		/* paranoid check, some drivers might be buggy */
-		if (unlikely((int)truesize < (int)skb->len))
-			truesize = skb->truesize;
-	}
-	return truesize;
-}
-
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
-			    bool adjust)
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int room;
@@ -502,16 +480,15 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
 
 	/* Check #1 */
 	if (room > 0 && !tcp_under_memory_pressure(sk)) {
-		unsigned int truesize = truesize_adjust(adjust, skb);
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
 		 * will fit to rcvbuf in future.
 		 */
-		if (tcp_win_from_space(sk, truesize) <= skb->len)
+		if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
 			incr = 2 * tp->advmss;
 		else
-			incr = __tcp_grow_window(sk, skb, truesize);
+			incr = __tcp_grow_window(sk, skb);
 
 		if (incr) {
 			incr = max_t(int, incr, 2 * skb->len);
@@ -805,7 +782,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 	tcp_ecn_check_ce(sk, skb);
 
 	if (skb->len >= 128)
-		tcp_grow_window(sk, skb, true);
+		tcp_grow_window(sk, skb);
 }
 
 /* Called to compute a smoothed rtt estimate. The data fed to this
@@ -992,8 +969,6 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
 		return 0;
 	if (seq_len > tp->mss_cache)
 		dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
-	else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
-		state->flag |= FLAG_DSACK_TLP;
 
 	tp->dsack_dups += dup_segs;
 	/* Skip the DSACK if dup segs weren't retransmitted by sender */
@@ -1001,14 +976,7 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
 		return 0;
 
 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
-	/* We increase the RACK ordering window in rounds where we receive
-	 * DSACKs that may have been due to reordering causing RACK to trigger
-	 * a spurious fast recovery. Thus RACK ignores DSACKs that happen
-	 * without having seen reordering, or that match TLP probes (TLP
-	 * is timer-driven, not triggered by RACK).
-	 */
-	if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
-		tp->rack.dsack_seen = 1;
+	tp->rack.dsack_seen = 1;
 
 	state->flag |= FLAG_DSACKING_ACK;
 	/* A spurious retransmission is delivered */
@@ -3660,7 +3628,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 	if (!tp->tlp_retrans) {
 		/* TLP of new data has been acknowledged */
 		tp->tlp_high_seq = 0;
-	} else if (flag & FLAG_DSACK_TLP) {
+	} else if (flag & FLAG_DSACKING_ACK) {
 		/* This DSACK means original and TLP probe arrived; no loss */
 		tp->tlp_high_seq = 0;
 	} else if (after(ack, tp->tlp_high_seq)) {
@@ -4679,9 +4647,10 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
 /* tcp_drop with reason
  */
 static void tcp_drop(struct sock *sk, struct sk_buff *skb,
-		 __u32 reason)
+		 int field, char *reason)
 {
-	trace_tcp_drop(sk, skb, reason);
+	trace_tcp_drop(sk, skb, field, reason);
+	sk_drops_add(sk, skb);
 	__kfree_skb(skb);
 }
 
@@ -4712,7 +4681,7 @@ static void tcp_ofo_queue(struct sock *sk)
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
-			tcp_drop(sk, skb, TCP_OFO_QUEUE);
+			tcp_drop(sk, skb, LINUX_MIB_TCPOFOQUEUE, "Tcp queue error");
 			continue;
 		}
 
@@ -4768,7 +4737,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
 		sk->sk_data_ready(sk);
-		tcp_drop(sk, skb, TCP_DATA_QUEUE_OFO);
+		tcp_drop(sk, skb, LINUX_MIB_TCPOFODROP, "Tcp rmem failed");
 		return;
 	}
 
@@ -4805,7 +4774,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		 * and trigger fast retransmit.
 		 */
 		if (tcp_is_sack(tp))
-			tcp_grow_window(sk, skb, true);
+			tcp_grow_window(sk, skb);
 		kfree_skb_partial(skb, fragstolen);
 		skb = NULL;
 		goto add_sack;
@@ -4831,7 +4800,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 				/* All the bits are present. Drop. */
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
-				tcp_drop(sk, skb, TCP_DATA_QUEUE_OFO);
+				tcp_drop(sk, skb, LINUX_MIB_TCPOFOMERGE, "Tcp bits are present");
 				skb = NULL;
 				tcp_dsack_set(sk, seq, end_seq);
 				goto add_sack;
@@ -4850,7 +4819,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 						 TCP_SKB_CB(skb1)->end_seq);
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
-				tcp_drop(sk, skb1, TCP_DATA_QUEUE_OFO);
+				tcp_drop(sk, skb1, LINUX_MIB_TCPOFOMERGE, "Tcp replace(skb.seq eq skb1.seq)");
 				goto merge_right;
 			}
 		} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4878,7 +4847,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 				 TCP_SKB_CB(skb1)->end_seq);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-		tcp_drop(sk, skb1, TCP_DATA_QUEUE_OFO);
+		tcp_drop(sk, skb1, LINUX_MIB_TCPOFOMERGE, "Tcp useless other segments");
 	}
 	/* If there is no skb after us, we are the last_skb ! */
 	if (!skb1)
@@ -4893,7 +4862,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		 * and trigger fast retransmit.
 		 */
 		if (tcp_is_sack(tp))
-			tcp_grow_window(sk, skb, false);
+			tcp_grow_window(sk, skb);
 		skb_condense(skb);
 		skb_set_owner_r(skb, sk);
 	}
@@ -5014,7 +4983,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 			sk->sk_data_ready(sk);
-			goto drop;
+			tcp_drop(sk, skb, LINUX_MIB_TCPRCVQDROP, "Tcp rmem failed");
+			goto end;
 		}
 
 		eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5054,8 +5024,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 out_of_window:
 		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 		inet_csk_schedule_ack(sk);
-drop:
-		tcp_drop(sk, skb, TCP_DATA_QUEUE);
+		tcp_drop(sk, skb, LINUX_MIB_TCPZEROWINDOWDROP, "Tcp out of order or zero window");
+end:
 		return;
 	}
 
@@ -5312,7 +5282,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
 		prev = rb_prev(node);
 		rb_erase(node, &tp->out_of_order_queue);
 		goal -= rb_to_skb(node)->truesize;
-		tcp_drop(sk, rb_to_skb(node), TCP_PRUNE_OFO_QUEUE);
+		tcp_drop(sk, rb_to_skb(node), LINUX_MIB_OFOPRUNED, "Tcp drop out-of-order queue");
 		if (!prev || goal <= 0) {
 			sk_mem_reclaim(sk);
 			if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5419,7 +5389,7 @@ static void tcp_new_space(struct sock *sk)
 		tp->snd_cwnd_stamp = tcp_jiffies32;
 	}
 
-	INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
+	sk->sk_write_space(sk);
 }
 
 static void tcp_check_space(struct sock *sk)
@@ -5647,7 +5617,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 						  LINUX_MIB_TCPACKSKIPPEDPAWS,
 						  &tp->last_oow_ack_time))
 				tcp_send_dupack(sk, skb);
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+			tcp_drop(sk, skb, LINUX_MIB_PAWSESTABREJECTED, "Tcp PAWS seq first");
 			goto end;
 		}
 		/* Reset is accepted even if it did not pass PAWS. */
@@ -5671,7 +5641,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		} else if (tcp_reset_check(sk, skb)) {
 			tcp_reset(sk, skb);
 		}
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+		tcp_drop(sk, skb, LINUX_MIB_PAWSESTABREJECTED, "Tcp check sequence number");
 		goto end;
 	}
 
@@ -5717,7 +5687,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 				tcp_fastopen_active_disable(sk);
 			tcp_send_challenge_ack(sk, skb);
 		}
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+		tcp_drop(sk, skb, LINUX_MIB_TCPCHALLENGEACK, "Tcp check RST bit ");
 		goto end;
 	}
 
@@ -5732,7 +5702,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 			TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
 		tcp_send_challenge_ack(sk, skb);
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_VALIDATE_INCOMING));
+		tcp_drop(sk, skb, LINUX_MIB_TCPSYNCHALLENGE, "Tcp check for a SYN");
 		goto end;
 	}
 
@@ -5858,7 +5828,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				return;
 			} else { /* Header too small */
 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-				tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+				tcp_drop(sk, skb, TCP_MIB_INERRS, "Tcp header too small");
 				goto end;
 			}
 		} else {
@@ -5914,7 +5884,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 		goto csum_error;
 
 	if (!th->ack && !th->rst && !th->syn) {
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+		tcp_drop(sk, skb, LINUX_MIB_TCPSLOWSTARTRETRANS, "Tcp state not in ack|rst|syn");
 		goto end;
 	}
 
@@ -5927,7 +5897,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 
 step5:
 	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) {
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+		tcp_drop(sk, skb, LINUX_MIB_TCPSACKDISCARD, "Tcp ack have not sent yet");
 		goto end;
 	}
 
@@ -5947,7 +5917,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	trace_tcp_bad_csum(skb);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-	tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_ESTABLISHED));
+	tcp_drop(sk, skb, TCP_MIB_CSUMERRORS, "Tcp csum error");
 end:
 	return;
 }
@@ -6149,7 +6119,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (th->rst) {
 			tcp_reset(sk, skb);
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_NUM, "Tcp reset");
 			goto end;
 		}
 
@@ -6239,7 +6209,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_TCPFASTOPENACTIVE, "Tcp fast open ack error");
 
 end:
 			return 0;
@@ -6314,7 +6284,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 		return -1;
 #else
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+		tcp_drop(sk, skb, LINUX_MIB_SYNCOOKIESRECV, "Tcp syn received error");
 		goto end;
 #endif
 	}
@@ -6325,7 +6295,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 discard_and_undo:
 	tcp_clear_options(&tp->rx_opt);
 	tp->rx_opt.mss_clamp = saved_clamp;
-	tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_SYNSENT_STATE_PROCESS));
+	tcp_drop(sk, skb, LINUX_MIB_TCPSACKDISCARD, "Tcp not neither of SYN or RST");
 	goto end;
 
 reset_and_undo:
@@ -6384,7 +6354,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
 	switch (sk->sk_state) {
 	case TCP_CLOSE:
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		tcp_drop(sk, skb, LINUX_MIB_TCPABORTONCLOSE, "Tcp close");
 		goto end;
 
 	case TCP_LISTEN:
@@ -6392,13 +6362,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			return 1;
 
 		if (th->rst) {
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "Tcp rst");
 			goto end;
 		}
 
 		if (th->syn) {
 			if (th->fin) {
-				tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+				tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "Tcp fin");
 				goto end;
 			}
 			/* It is possible that we process SYN packets from backlog,
@@ -6415,7 +6385,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			consume_skb(skb);
 			return 0;
 		}
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "Tcp syn");
 		goto end;
 
 	case TCP_SYN_SENT:
@@ -6443,13 +6413,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		    sk->sk_state != TCP_FIN_WAIT1);
 
 		if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "Tcp check req error");
 			goto end;
 		}
 	}
 
 	if (!th->ack && !th->rst && !th->syn) {
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "Tcp not ack|rst|syn");
 		goto end;
 	}
 
@@ -6465,7 +6435,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_state == TCP_SYN_RECV)
 			return 1;	/* send one RST */
 		tcp_send_challenge_ack(sk, skb);
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		tcp_drop(sk, skb, LINUX_MIB_TCPCHALLENGEACK, "Tcp check ack failed");
 		goto end;
 	}
 	switch (sk->sk_state) {
@@ -6559,7 +6529,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			inet_csk_reset_keepalive_timer(sk, tmo);
 		} else {
 			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_TCPABORTONDATA, "Tcp fin wait2");
 			goto end;
 		}
 		break;
@@ -6568,7 +6538,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	case TCP_CLOSING:
 		if (tp->snd_una == tp->write_seq) {
 			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_TIMEWAITED, "Tcp time wait");
 			goto end;
 		}
 		break;
@@ -6577,7 +6547,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (tp->snd_una == tp->write_seq) {
 			tcp_update_metrics(sk);
 			tcp_done(sk);
-			tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+			tcp_drop(sk, skb, LINUX_MIB_TCPPUREACKS, "Tcp last ack");
 			goto end;
 		}
 		break;
@@ -6596,7 +6566,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			 * continue to be processed, drop the packet.
 			 */
 			if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
-				tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+				tcp_drop(sk, skb, LINUX_MIB_TCPPUREACKS, "Tcp subflow been reset");
 				goto end;
 			}
 			break;
@@ -6630,7 +6600,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (!queued) {
-		tcp_drop(sk, skb, TCP_DROP_MASK(__LINE__, TCP_RCV_STATE_PROCESS));
+		tcp_drop(sk, skb, LINUX_MIB_TCPOFOQUEUE, "Tcp rcv synsent state process");
 	}
 
 end:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] net: tcp_drop adds `reason` and SNMP parameters for tracing v3
  2021-09-02  9:33 [PATCH] net: tcp_drop adds `reason` and SNMP parameters for tracing v3 Zhongya Yan
@ 2021-09-02 15:44 ` Eric Dumazet
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Dumazet @ 2021-09-02 15:44 UTC (permalink / raw)
  To: Zhongya Yan
  Cc: netdev, LKML, Jakub Kicinski, Steven Rostedt, Ingo Molnar,
	David Miller, Hideaki YOSHIFUJI, David Ahern, hengqi.chen,
	Yonghong Song, Brendan Gregg, 2228598786

On Thu, Sep 2, 2021 at 2:34 AM Zhongya Yan <yan2228598786@gmail.com> wrote:
>
> I used the suggestion from `Brendan Gregg`. In addition to the
> `reason` parameter there is also the `field` parameter pointing
> to `SNMP` to distinguish the `tcp_drop` cause. I know what I
> submitted is not accurate, so I am submitting the current
> patch to get comments and criticism from everyone so that I
> can submit better code and solutions.And of course to make me
> more familiar and understand the `linux` kernel network code.
> Thank you everyone!
>
> Signed-off-by: Zhongya Yan <yan2228598786@gmail.com>
> ---
>  include/trace/events/tcp.h |  39 +++---------
>  net/ipv4/tcp_input.c       | 126 ++++++++++++++-----------------------
>  2 files changed, 57 insertions(+), 108 deletions(-)
>
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 699539702ea9..80892660458e 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -371,28 +371,10 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
>         TP_ARGS(skb)
>  );
>
> -// from author @{Steven Rostedt}
> -#define TCP_DROP_REASON                                                        \
> -       REASON_STRING(TCP_OFO_QUEUE,            ofo_queue)                      \
> -       REASON_STRING(TCP_DATA_QUEUE_OFO,               data_queue_ofo)                 \
> -       REASON_STRING(TCP_DATA_QUEUE,           data_queue)                     \
> -       REASON_STRING(TCP_PRUNE_OFO_QUEUE,              prune_ofo_queue)                \
> -       REASON_STRING(TCP_VALIDATE_INCOMING,    validate_incoming)              \
> -       REASON_STRING(TCP_RCV_ESTABLISHED,              rcv_established)                \
> -       REASON_STRING(TCP_RCV_SYNSENT_STATE_PROCESS, rcv_synsent_state_process) \
> -       REASON_STRINGe(TCP_RCV_STATE_PROCESS,   rcv_state_proces)

??? On which tree / branch this patch is based on ?

> -
> -#undef REASON_STRING
> -#undef REASON_STRINGe
> -
> -#define REASON_STRING(code, name) {code , #name},
> -#define REASON_STRINGe(code, name) {code, #name}
> -
> -
>  TRACE_EVENT(tcp_drop,
> -               TP_PROTO(struct sock *sk, struct sk_buff *skb, __u32 reason),
> +               TP_PROTO(struct sock *sk, struct sk_buff *skb, int field, const char *reason),
>
> -               TP_ARGS(sk, skb, reason),
> +               TP_ARGS(sk, skb, field, reason),
>
>                 TP_STRUCT__entry(
>                         __array(__u8, saddr, sizeof(struct sockaddr_in6))
> @@ -409,9 +391,8 @@ TRACE_EVENT(tcp_drop,
>                         __field(__u32, srtt)
>                         __field(__u32, rcv_wnd)
>                         __field(__u64, sock_cookie)
> -                       __field(__u32, reason)
> -                       __field(__u32, reason_code)
> -                       __field(__u32, reason_line)
> +                       __field(int, field)
> +                       __string(reason, reason)
>                         ),
>
>                 TP_fast_assign(
> @@ -437,21 +418,19 @@ TRACE_EVENT(tcp_drop,
>                                 __entry->ssthresh = tcp_current_ssthresh(sk);
>                                 __entry->srtt = tp->srtt_us >> 3;
>                                 __entry->sock_cookie = sock_gen_cookie(sk);
> -                               __entry->reason = reason;
> -                               __entry->reason_code = TCP_DROP_CODE(reason);
> -                               __entry->reason_line = TCP_DROP_LINE(reason);
> +                               __entry->field = field;
> +
> +                               __assign_str(reason, reason);
>                 ),
>
>                 TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x \
>                                 snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u \
> -                               sock_cookie=%llx reason=%d reason_type=%s reason_line=%d",
> +                               sock_cookie=%llx field=%d reason=%s",
>                                 __entry->saddr, __entry->daddr, __entry->mark,
>                                 __entry->data_len, __entry->snd_nxt, __entry->snd_una,
>                                 __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
>                                 __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
> -                               __entry->reason,
> -                               __print_symbolic(__entry->reason_code, TCP_DROP_REASON),
> -                               __entry->reason_line)
> +                               field, __get_str(reason))
>  );
>
>  #endif /* _TRACE_TCP_H */
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index b2bc49f1f0de..bd33fd466e1e 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -100,7 +100,6 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>  #define FLAG_UPDATE_TS_RECENT  0x4000 /* tcp_replace_ts_recent() */
>  #define FLAG_NO_CHALLENGE_ACK  0x8000 /* do not call tcp_send_challenge_ack()  */
>  #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
> -#define FLAG_DSACK_TLP         0x20000 /* DSACK for tail loss probe */
>
>  #define FLAG_ACKED             (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
>  #define FLAG_NOT_DUP           (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
> @@ -455,12 +454,11 @@ static void tcp_sndbuf_expand(struct sock *sk)
>   */
>
>  /* Slow part of check#2. */
> -static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
> -                            unsigned int skbtruesize)
> +static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)

???

>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>         /* Optimize this! */
> -       int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
> +       int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;

???

>         int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
>
>         while (tp->rcv_ssthresh <= window) {
> @@ -473,27 +471,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
>         return 0;
>  }
>
> -/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
> - * can play nice with us, as sk_buff and skb->head might be either
> - * freed or shared with up to MAX_SKB_FRAGS segments.
> - * Only give a boost to drivers using page frag(s) to hold the frame(s),
> - * and if no payload was pulled in skb->head before reaching us.
> - */
> -static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
> -{
> -       u32 truesize = skb->truesize;
> -
> -       if (adjust && !skb_headlen(skb)) {
> -               truesize -= SKB_TRUESIZE(skb_end_offset(skb));
> -               /* paranoid check, some drivers might be buggy */
> -               if (unlikely((int)truesize < (int)skb->len))
> -                       truesize = skb->truesize;
> -       }
> -       return truesize;
> -}

It seems clear you are doing wrong things.

Have you silently reverted a prior patch ?

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-02 15:45 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-02  9:33 [PATCH] net: tcp_drop adds `reason` and SNMP parameters for tracing v3 Zhongya Yan
2021-09-02 15:44 ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).