linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
@ 2017-11-09  6:01 Yafang Shao
  2017-11-09  6:43 ` Alexei Starovoitov
  0 siblings, 1 reply; 15+ messages in thread
From: Yafang Shao @ 2017-11-09  6:01 UTC (permalink / raw)
  To: davem; +Cc: kuznet, yoshfuji, rostedt, mingo, netdev, linux-kernel, Yafang Shao

With this newly introduced TRACE_EVENT, it will be very easy to minotor
TCP/IPv4 state transition.

A new TRACE_SYSTEM named tcp is added, in which we can trace other TCP
event as well.

Two helpers are added,
static inline void __tcp_set_state(struct sock *sk, int state)
static inline void __sk_state_store(struct sock *sk, int newstate)

When do TCP/IPv4 state transition, we should use these two helpers or
use tcp_set_state() instead of assign a value to sk_state directly.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/net/tcp.h               | 16 ++++++++++++
 include/trace/events/tcp.h      | 58 +++++++++++++++++++++++++++++++++++++++++
 net/ipv4/inet_connection_sock.c |  9 ++++---
 net/ipv4/inet_hashtables.c      |  2 +-
 net/ipv4/tcp.c                  |  2 +-
 5 files changed, 82 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/tcp.h

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 89974c5..a8336d3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -49,6 +49,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/bpf-cgroup.h>
+#include <trace/events/tcp.h>
 
 extern struct inet_hashinfo tcp_hashinfo;
 
@@ -1284,6 +1285,21 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
 #endif
 void tcp_set_state(struct sock *sk, int state);
 
+/*
+ * To trace TCP state transition.
+ */
+static inline void __tcp_set_state(struct sock *sk, int state)
+{
+	trace_tcp_set_state(sk, sk->sk_state, state);
+	sk->sk_state = state;
+}
+
+static inline void __sk_state_store(struct sock *sk, int newstate)
+{
+	trace_tcp_set_state(sk, sk->sk_state, newstate);
+	sk_state_store(sk, newstate);
+}
+
 void tcp_done(struct sock *sk);
 
 int tcp_abort(struct sock *sk, int err);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index 0000000..abf65af
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,58 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include <linux/tracepoint.h>
+#include <net/sock.h>
+#include <net/inet_timewait_sock.h>
+#include <net/request_sock.h>
+#include <net/inet_sock.h>
+#include <net/tcp_states.h>
+
+TRACE_EVENT(tcp_set_state,
+	TP_PROTO(struct sock *sk, int oldstate, int newstate),
+	TP_ARGS(sk, oldstate, newstate),
+
+	TP_STRUCT__entry(
+		__field(__be32, dst)
+		__field(__be32, src)
+		__field(__u16, dport)
+		__field(__u16, sport)
+		__field(int, oldstate)
+		__field(int, newstate)
+	),
+
+	TP_fast_assign(
+		if (oldstate == TCP_TIME_WAIT) {
+			__entry->dst = inet_twsk(sk)->tw_daddr;
+			__entry->src = inet_twsk(sk)->tw_rcv_saddr;
+			__entry->dport = ntohs(inet_twsk(sk)->tw_dport);
+			__entry->sport = ntohs(inet_twsk(sk)->tw_sport);
+		} else if (oldstate == TCP_NEW_SYN_RECV) {
+			__entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
+			__entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
+			__entry->dport =
+				ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
+			__entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
+		} else {
+			__entry->dst = inet_sk(sk)->inet_daddr;
+			__entry->src = inet_sk(sk)->inet_rcv_saddr;
+			__entry->dport = ntohs(inet_sk(sk)->inet_dport);
+			__entry->sport = ntohs(inet_sk(sk)->inet_sport);
+		}
+
+		__entry->oldstate = oldstate;
+		__entry->newstate = newstate;
+	),
+
+	TP_printk("%08X:%04X %08X:%04X, %02x %02x",
+		__entry->src, __entry->sport, __entry->dst, __entry->dport,
+		__entry->oldstate, __entry->newstate)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c039c93..307a046 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -27,6 +27,9 @@
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/tcp.h>
+
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
@@ -786,7 +789,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 	if (newsk) {
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 
-		newsk->sk_state = TCP_SYN_RECV;
+		__tcp_set_state(newsk, TCP_SYN_RECV);
 		newicsk->icsk_bind_hash = NULL;
 
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -880,7 +883,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 	 * It is OK, because this socket enters to hash table only
 	 * after validation is complete.
 	 */
-	sk_state_store(sk, TCP_LISTEN);
+	__sk_state_store(sk, TCP_LISTEN);
 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
 		inet->inet_sport = htons(inet->inet_num);
 
@@ -891,7 +894,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 			return 0;
 	}
 
-	sk->sk_state = TCP_CLOSE;
+	__tcp_set_state(sk, TCP_CLOSE);
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 597bb4c..0f45d456 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -430,7 +430,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	} else {
 		percpu_counter_inc(sk->sk_prot->orphan_count);
-		sk->sk_state = TCP_CLOSE;
+		__tcp_set_state(sk, TCP_CLOSE);
 		sock_set_flag(sk, SOCK_DEAD);
 		inet_csk_destroy_sock(sk);
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402..984dce6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2040,7 +2040,7 @@ void tcp_set_state(struct sock *sk, int state)
 	/* Change state AFTER socket is unhashed to avoid closed
 	 * socket sitting in hash tables.
 	 */
-	sk_state_store(sk, state);
+	__sk_state_store(sk, state);
 
 #ifdef STATE_TRACE
 	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
  2017-11-09  6:01 [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition Yafang Shao
@ 2017-11-09  6:43 ` Alexei Starovoitov
  2017-11-09  6:50   ` Yafang Shao
  2017-11-09 18:18   ` Steven Rostedt
  0 siblings, 2 replies; 15+ messages in thread
From: Alexei Starovoitov @ 2017-11-09  6:43 UTC (permalink / raw)
  To: Yafang Shao; +Cc: davem, rostedt, mingo, netdev, linux-kernel, songliubraving

On Thu, Nov 09, 2017 at 02:01:38PM +0800, Yafang Shao wrote:
> With this newly introduced TRACE_EVENT, it will be very easy to minotor
> TCP/IPv4 state transition.
> 
> A new TRACE_SYSTEM named tcp is added, in which we can trace other TCP
> event as well.
> 
> Two helpers are added,
> static inline void __tcp_set_state(struct sock *sk, int state)
> static inline void __sk_state_store(struct sock *sk, int newstate)
> 
> When do TCP/IPv4 state transition, we should use these two helpers or
> use tcp_set_state() instead of assign a value to sk_state directly.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

when you submit a patch pls make it clear which tree this patch is targeting.
In this case it should have been net-next,
but the patch clearly conflicts with it.
Make sure to rebase.

> +/*
> + * To trace TCP state transition.
> + */
> +static inline void __tcp_set_state(struct sock *sk, int state)
> +{
> +	trace_tcp_set_state(sk, sk->sk_state, state);
> +	sk->sk_state = state;
> +}
> +
> +static inline void __sk_state_store(struct sock *sk, int newstate)
> +{
> +	trace_tcp_set_state(sk, sk->sk_state, newstate);
> +	sk_state_store(sk, newstate);
> +}
> +
>  void tcp_done(struct sock *sk);
>  
>  int tcp_abort(struct sock *sk, int err);
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> new file mode 100644
> index 0000000..abf65af
> --- /dev/null
> +++ b/include/trace/events/tcp.h
> @@ -0,0 +1,58 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM tcp
> +
> +#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_TCP_H
> +
> +#include <linux/tracepoint.h>
> +#include <net/sock.h>
> +#include <net/inet_timewait_sock.h>
> +#include <net/request_sock.h>
> +#include <net/inet_sock.h>
> +#include <net/tcp_states.h>
> +
> +TRACE_EVENT(tcp_set_state,
> +	TP_PROTO(struct sock *sk, int oldstate, int newstate),
> +	TP_ARGS(sk, oldstate, newstate),
> +
> +	TP_STRUCT__entry(
> +		__field(__be32, dst)
> +		__field(__be32, src)
> +		__field(__u16, dport)
> +		__field(__u16, sport)
> +		__field(int, oldstate)
> +		__field(int, newstate)
> +	),
> +
> +	TP_fast_assign(
> +		if (oldstate == TCP_TIME_WAIT) {
> +			__entry->dst = inet_twsk(sk)->tw_daddr;
> +			__entry->src = inet_twsk(sk)->tw_rcv_saddr;
> +			__entry->dport = ntohs(inet_twsk(sk)->tw_dport);
> +			__entry->sport = ntohs(inet_twsk(sk)->tw_sport);
> +		} else if (oldstate == TCP_NEW_SYN_RECV) {
> +			__entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
> +			__entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
> +			__entry->dport =
> +				ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
> +			__entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
> +		} else {
> +			__entry->dst = inet_sk(sk)->inet_daddr;
> +			__entry->src = inet_sk(sk)->inet_rcv_saddr;
> +			__entry->dport = ntohs(inet_sk(sk)->inet_dport);
> +			__entry->sport = ntohs(inet_sk(sk)->inet_sport);
> +		}
> +
> +		__entry->oldstate = oldstate;
> +		__entry->newstate = newstate;
> +	),
> +
> +	TP_printk("%08X:%04X %08X:%04X, %02x %02x",
> +		__entry->src, __entry->sport, __entry->dst, __entry->dport,
> +		__entry->oldstate, __entry->newstate)

direct %x of state is not allowed.
This has to use show_tcp_state_name() like it's done in trace_tcp_set_state

Also I'm missing the reason to introduce another tracepoint
that looks just like trace_tcp_set_state.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
  2017-11-09  6:43 ` Alexei Starovoitov
@ 2017-11-09  6:50   ` Yafang Shao
  2017-11-09 18:18   ` Steven Rostedt
  1 sibling, 0 replies; 15+ messages in thread
From: Yafang Shao @ 2017-11-09  6:50 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: David Miller, rostedt, mingo, netdev, linux-kernel, songliubraving

2017-11-09 14:43 GMT+08:00 Alexei Starovoitov <alexei.starovoitov@gmail.com>:
> On Thu, Nov 09, 2017 at 02:01:38PM +0800, Yafang Shao wrote:
>> With this newly introduced TRACE_EVENT, it will be very easy to minotor
>> TCP/IPv4 state transition.
>>
>> A new TRACE_SYSTEM named tcp is added, in which we can trace other TCP
>> event as well.
>>
>> Two helpers are added,
>> static inline void __tcp_set_state(struct sock *sk, int state)
>> static inline void __sk_state_store(struct sock *sk, int newstate)
>>
>> When do TCP/IPv4 state transition, we should use these two helpers or
>> use tcp_set_state() instead of assign a value to sk_state directly.
>>
>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>
> when you submit a patch pls make it clear which tree this patch is targeting.
> In this case it should have been net-next,
> but the patch clearly conflicts with it.
> Make sure to rebase.

I do it based on this tree
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
OK I will check the net-next then.

>
>> +/*
>> + * To trace TCP state transition.
>> + */
>> +static inline void __tcp_set_state(struct sock *sk, int state)
>> +{
>> +     trace_tcp_set_state(sk, sk->sk_state, state);
>> +     sk->sk_state = state;
>> +}
>> +
>> +static inline void __sk_state_store(struct sock *sk, int newstate)
>> +{
>> +     trace_tcp_set_state(sk, sk->sk_state, newstate);
>> +     sk_state_store(sk, newstate);
>> +}
>> +
>>  void tcp_done(struct sock *sk);
>>
>>  int tcp_abort(struct sock *sk, int err);
>> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
>> new file mode 100644
>> index 0000000..abf65af
>> --- /dev/null
>> +++ b/include/trace/events/tcp.h
>> @@ -0,0 +1,58 @@
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM tcp
>> +
>> +#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_TCP_H
>> +
>> +#include <linux/tracepoint.h>
>> +#include <net/sock.h>
>> +#include <net/inet_timewait_sock.h>
>> +#include <net/request_sock.h>
>> +#include <net/inet_sock.h>
>> +#include <net/tcp_states.h>
>> +
>> +TRACE_EVENT(tcp_set_state,
>> +     TP_PROTO(struct sock *sk, int oldstate, int newstate),
>> +     TP_ARGS(sk, oldstate, newstate),
>> +
>> +     TP_STRUCT__entry(
>> +             __field(__be32, dst)
>> +             __field(__be32, src)
>> +             __field(__u16, dport)
>> +             __field(__u16, sport)
>> +             __field(int, oldstate)
>> +             __field(int, newstate)
>> +     ),
>> +
>> +     TP_fast_assign(
>> +             if (oldstate == TCP_TIME_WAIT) {
>> +                     __entry->dst = inet_twsk(sk)->tw_daddr;
>> +                     __entry->src = inet_twsk(sk)->tw_rcv_saddr;
>> +                     __entry->dport = ntohs(inet_twsk(sk)->tw_dport);
>> +                     __entry->sport = ntohs(inet_twsk(sk)->tw_sport);
>> +             } else if (oldstate == TCP_NEW_SYN_RECV) {
>> +                     __entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
>> +                     __entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
>> +                     __entry->dport =
>> +                             ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
>> +                     __entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
>> +             } else {
>> +                     __entry->dst = inet_sk(sk)->inet_daddr;
>> +                     __entry->src = inet_sk(sk)->inet_rcv_saddr;
>> +                     __entry->dport = ntohs(inet_sk(sk)->inet_dport);
>> +                     __entry->sport = ntohs(inet_sk(sk)->inet_sport);
>> +             }
>> +
>> +             __entry->oldstate = oldstate;
>> +             __entry->newstate = newstate;
>> +     ),
>> +
>> +     TP_printk("%08X:%04X %08X:%04X, %02x %02x",
>> +             __entry->src, __entry->sport, __entry->dst, __entry->dport,
>> +             __entry->oldstate, __entry->newstate)
>
> direct %x of state is not allowed.
> This has to use show_tcp_state_name() like it's done in trace_tcp_set_state
>
> Also I'm missing the reason to introduce another tracepoint
> that looks just like trace_tcp_set_state.
>

I will check net-next

Thanks
Yafang

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
  2017-11-09  6:43 ` Alexei Starovoitov
  2017-11-09  6:50   ` Yafang Shao
@ 2017-11-09 18:18   ` Steven Rostedt
  2017-11-09 18:34     ` Song Liu
  1 sibling, 1 reply; 15+ messages in thread
From: Steven Rostedt @ 2017-11-09 18:18 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Yafang Shao, davem, mingo, netdev, linux-kernel, songliubraving

On Thu, 9 Nov 2017 15:43:29 +0900
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> > +TRACE_EVENT(tcp_set_state,
> > +	TP_PROTO(struct sock *sk, int oldstate, int newstate),
> > +	TP_ARGS(sk, oldstate, newstate),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(__be32, dst)
> > +		__field(__be32, src)
> > +		__field(__u16, dport)
> > +		__field(__u16, sport)
> > +		__field(int, oldstate)
> > +		__field(int, newstate)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		if (oldstate == TCP_TIME_WAIT) {
> > +			__entry->dst = inet_twsk(sk)->tw_daddr;
> > +			__entry->src = inet_twsk(sk)->tw_rcv_saddr;
> > +			__entry->dport = ntohs(inet_twsk(sk)->tw_dport);
> > +			__entry->sport = ntohs(inet_twsk(sk)->tw_sport);
> > +		} else if (oldstate == TCP_NEW_SYN_RECV) {
> > +			__entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
> > +			__entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
> > +			__entry->dport =
> > +				ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
> > +			__entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
> > +		} else {
> > +			__entry->dst = inet_sk(sk)->inet_daddr;
> > +			__entry->src = inet_sk(sk)->inet_rcv_saddr;
> > +			__entry->dport = ntohs(inet_sk(sk)->inet_dport);
> > +			__entry->sport = ntohs(inet_sk(sk)->inet_sport);
> > +		}
> > +
> > +		__entry->oldstate = oldstate;
> > +		__entry->newstate = newstate;
> > +	),
> > +
> > +	TP_printk("%08X:%04X %08X:%04X, %02x %02x",
> > +		__entry->src, __entry->sport, __entry->dst, __entry->dport,
> > +		__entry->oldstate, __entry->newstate)  
> 
> direct %x of state is not allowed.
> This has to use show_tcp_state_name() like it's done in trace_tcp_set_state

Hmm, I need to look at trace_tcp_set_state. I'm guessing it is in
net-next too?

> 
> Also I'm missing the reason to introduce another tracepoint
> that looks just like trace_tcp_set_state.

I want to make sure that perf and trace-cmd can parse the TP_printk(),
if it is having helper functions like that in the TP_printk() part,
then the libtraceevent needs to be aware of that.

-- Steve

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
  2017-11-09 18:18   ` Steven Rostedt
@ 2017-11-09 18:34     ` Song Liu
  2017-11-09 23:40       ` Song Liu
  0 siblings, 1 reply; 15+ messages in thread
From: Song Liu @ 2017-11-09 18:34 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Alexei Starovoitov, Yafang Shao, davem, mingo, netdev, linux-kernel


> On Nov 9, 2017, at 10:18 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> On Thu, 9 Nov 2017 15:43:29 +0900
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
>>> +TRACE_EVENT(tcp_set_state,
>>> +	TP_PROTO(struct sock *sk, int oldstate, int newstate),
>>> +	TP_ARGS(sk, oldstate, newstate),
>>> +
>>> +	TP_STRUCT__entry(
>>> +		__field(__be32, dst)
>>> +		__field(__be32, src)
>>> +		__field(__u16, dport)
>>> +		__field(__u16, sport)
>>> +		__field(int, oldstate)
>>> +		__field(int, newstate)
>>> +	),
>>> +
>>> +	TP_fast_assign(
>>> +		if (oldstate == TCP_TIME_WAIT) {
>>> +			__entry->dst = inet_twsk(sk)->tw_daddr;
>>> +			__entry->src = inet_twsk(sk)->tw_rcv_saddr;
>>> +			__entry->dport = ntohs(inet_twsk(sk)->tw_dport);
>>> +			__entry->sport = ntohs(inet_twsk(sk)->tw_sport);
>>> +		} else if (oldstate == TCP_NEW_SYN_RECV) {
>>> +			__entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
>>> +			__entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
>>> +			__entry->dport =
>>> +				ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
>>> +			__entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
>>> +		} else {
>>> +			__entry->dst = inet_sk(sk)->inet_daddr;
>>> +			__entry->src = inet_sk(sk)->inet_rcv_saddr;
>>> +			__entry->dport = ntohs(inet_sk(sk)->inet_dport);
>>> +			__entry->sport = ntohs(inet_sk(sk)->inet_sport);
>>> +		}
>>> +
>>> +		__entry->oldstate = oldstate;
>>> +		__entry->newstate = newstate;
>>> +	),
>>> +
>>> +	TP_printk("%08X:%04X %08X:%04X, %02x %02x",
>>> +		__entry->src, __entry->sport, __entry->dst, __entry->dport,
>>> +		__entry->oldstate, __entry->newstate)  
>> 
>> direct %x of state is not allowed.
>> This has to use show_tcp_state_name() like it's done in trace_tcp_set_state
> 
> Hmm, I need to look at trace_tcp_set_state. I'm guessing it is in
> net-next too?

Yes, in net-next. There are 6 tracepoints under tcp group:

  tcp_destroy_sock  
  tcp_receive_reset  
  tcp_retransmit_skb  
  tcp_retransmit_synack  
  tcp_send_reset  
  tcp_set_state

They are all added recently.
 
> 
>> 
>> Also I'm missing the reason to introduce another tracepoint
>> that looks just like trace_tcp_set_state.
> 
> I want to make sure that perf and trace-cmd can parse the TP_printk(),
> if it is having helper functions like that in the TP_printk() part,
> then the libtraceevent needs to be aware of that.
> 

tcp_set_state uses __print_symbolic to show state in text format. I found
trace-cmd cannot parse that part:

[011] 147338.660560: tcp_set_state:        sport=16262 dport=48346 \
    saddr=127.0.0.6 daddr=127.0.0.6 saddrv6=2401:db00:30:317e:face:0:1f:0 \
    daddrv6=2401:db00:30:31e5:face:0:7f:0 oldstate= newstate=

Other parts of the output looks good to me.

Thanks,
Song

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
  2017-11-09 18:34     ` Song Liu
@ 2017-11-09 23:40       ` Song Liu
  2017-11-10  0:42         ` Steven Rostedt
  2017-11-10  0:57         ` [PATCH] tcp: Export to userspace the TCP state names for the trace events Steven Rostedt
  0 siblings, 2 replies; 15+ messages in thread
From: Song Liu @ 2017-11-09 23:40 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Alexei Starovoitov, Yafang Shao, David Miller, mingo, netdev,
	linux-kernel


> On Nov 9, 2017, at 10:34 AM, Song Liu <songliubraving@fb.com> wrote:
> 
>> 
>> On Nov 9, 2017, at 10:18 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
>> 
>> On Thu, 9 Nov 2017 15:43:29 +0900
>> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>> 
>>>> +TRACE_EVENT(tcp_set_state,
>>>> +	TP_PROTO(struct sock *sk, int oldstate, int newstate),
>>>> +	TP_ARGS(sk, oldstate, newstate),
>>>> +
>>>> +	TP_STRUCT__entry(
>>>> +		__field(__be32, dst)
>>>> +		__field(__be32, src)
>>>> +		__field(__u16, dport)
>>>> +		__field(__u16, sport)
>>>> +		__field(int, oldstate)
>>>> +		__field(int, newstate)
>>>> +	),
>>>> +
>>>> +	TP_fast_assign(
>>>> +		if (oldstate == TCP_TIME_WAIT) {
>>>> +			__entry->dst = inet_twsk(sk)->tw_daddr;
>>>> +			__entry->src = inet_twsk(sk)->tw_rcv_saddr;
>>>> +			__entry->dport = ntohs(inet_twsk(sk)->tw_dport);
>>>> +			__entry->sport = ntohs(inet_twsk(sk)->tw_sport);
>>>> +		} else if (oldstate == TCP_NEW_SYN_RECV) {
>>>> +			__entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
>>>> +			__entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
>>>> +			__entry->dport =
>>>> +				ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
>>>> +			__entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
>>>> +		} else {
>>>> +			__entry->dst = inet_sk(sk)->inet_daddr;
>>>> +			__entry->src = inet_sk(sk)->inet_rcv_saddr;
>>>> +			__entry->dport = ntohs(inet_sk(sk)->inet_dport);
>>>> +			__entry->sport = ntohs(inet_sk(sk)->inet_sport);
>>>> +		}
>>>> +
>>>> +		__entry->oldstate = oldstate;
>>>> +		__entry->newstate = newstate;
>>>> +	),
>>>> +
>>>> +	TP_printk("%08X:%04X %08X:%04X, %02x %02x",
>>>> +		__entry->src, __entry->sport, __entry->dst, __entry->dport,
>>>> +		__entry->oldstate, __entry->newstate)  
>>> 
>>> direct %x of state is not allowed.
>>> This has to use show_tcp_state_name() like it's done in trace_tcp_set_state
>> 
>> Hmm, I need to look at trace_tcp_set_state. I'm guessing it is in
>> net-next too?
> 
> Yes, in net-next. There are 6 tracepoints under tcp group:
> 
>  tcp_destroy_sock  
>  tcp_receive_reset  
>  tcp_retransmit_skb  
>  tcp_retransmit_synack  
>  tcp_send_reset  
>  tcp_set_state
> 
> They are all added recently.
> 
>> 
>>> 
>>> Also I'm missing the reason to introduce another tracepoint
>>> that looks just like trace_tcp_set_state.
>> 
>> I want to make sure that perf and trace-cmd can parse the TP_printk(),
>> if it is having helper functions like that in the TP_printk() part,
>> then the libtraceevent needs to be aware of that.
>> 
> 
> tcp_set_state uses __print_symbolic to show state in text format. I found
> trace-cmd cannot parse that part:
> 
> [011] 147338.660560: tcp_set_state:        sport=16262 dport=48346 \
>    saddr=127.0.0.6 daddr=127.0.0.6 saddrv6=2401:db00:30:317e:face:0:1f:0 \
>    daddrv6=2401:db00:30:31e5:face:0:7f:0 oldstate= newstate=
> 
> Other parts of the output looks good to me.
> 
> Thanks,
> Song

I am not sure whether this is the best approach, but the following patch 
fixes the output of perf:

     0.44%  sport=16262 dport=39362 saddr=127.0.0.6 daddr=127.0.0.6 \
saddrv6=2401:db00:30:317e:face:0:1f:0 daddrv6=2401:db00:30:31e5:face:0:7f:0 \
oldstate=TCP_CLOSE_WAIT newstate=TCP_LAST_ACK

Thanks,
Song


>From 4b7e27631a4c7df96a38223a95ee3ede2f5f2d19 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 9 Nov 2017 15:30:07 -0800
Subject: [PATCH] libtraceevent: add flags for tcp state names

Names of TCP states are added to flags in event-parse.c.

The names are used to print symbolic names in tracepoint:
tcp/tcp_set_state.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/lib/traceevent/event-parse.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 7ce724f..4972dc2 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3790,6 +3790,20 @@ static const struct flag flags[] = {

        { "HRTIMER_NORESTART", 0 },
        { "HRTIMER_RESTART", 1 },
+
+       /* tcp state names, see include/net/tcp_states.h */
+       { "TCP_ESTABLISHED", 1 },
+       { "TCP_SYN_SENT", 2 },
+       { "TCP_SYN_RECV", 3 },
+       { "TCP_FIN_WAIT1", 4 },
+       { "TCP_FIN_WAIT2", 5 },
+       { "TCP_TIME_WAIT", 6 },
+       { "TCP_CLOSE", 7 },
+       { "TCP_CLOSE_WAIT", 8 },
+       { "TCP_LAST_ACK", 9 },
+       { "TCP_LISTEN", 10 },
+       { "TCP_CLOSING", 11 },
+       { "TCP_NEW_SYN_RECV", 12 },
 };

 static long long eval_flag(const char *flag)
--
2.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition
  2017-11-09 23:40       ` Song Liu
@ 2017-11-10  0:42         ` Steven Rostedt
  2017-11-10  0:57         ` [PATCH] tcp: Export to userspace the TCP state names for the trace events Steven Rostedt
  1 sibling, 0 replies; 15+ messages in thread
From: Steven Rostedt @ 2017-11-10  0:42 UTC (permalink / raw)
  To: Song Liu
  Cc: Alexei Starovoitov, Yafang Shao, David Miller, mingo, netdev,
	linux-kernel

On Thu, 9 Nov 2017 23:40:13 +0000
Song Liu <songliubraving@fb.com> wrote:

> > tcp_set_state uses __print_symbolic to show state in text format. I found
> > trace-cmd cannot parse that part:
> > 
> > [011] 147338.660560: tcp_set_state:        sport=16262 dport=48346 \
> >    saddr=127.0.0.6 daddr=127.0.0.6 saddrv6=2401:db00:30:317e:face:0:1f:0 \
> >    daddrv6=2401:db00:30:31e5:face:0:7f:0 oldstate= newstate=

The latest trace-cmd does show oldstate=0xa newstate=0x7, since I fixed
it so undefined symbols and flags are displayed.

> > 
> > Other parts of the output looks good to me.
> > 
> > Thanks,
> > Song  
> 
> I am not sure whether this is the best approach, but the following patch 
> fixes the output of perf:

No it's not the best approach. But the below patch is ;-)

> 
>      0.44%  sport=16262 dport=39362 saddr=127.0.0.6 daddr=127.0.0.6 \
> saddrv6=2401:db00:30:317e:face:0:1f:0 daddrv6=2401:db00:30:31e5:face:0:7f:0 \
> oldstate=TCP_CLOSE_WAIT newstate=TCP_LAST_ACK
> 

I'll send a formal patch if you all approve.

-- Steve

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca6cbf1..62e5bad7901f 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,21 +9,36 @@
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
 
+#define tcp_state_names				\
+	EM(TCP_ESTABLISHED)			\
+	EM(TCP_SYN_SENT)			\
+	EM(TCP_SYN_RECV)			\
+	EM(TCP_FIN_WAIT1)			\
+	EM(TCP_FIN_WAIT2)			\
+	EM(TCP_TIME_WAIT)			\
+	EM(TCP_CLOSE)				\
+	EM(TCP_CLOSE_WAIT)			\
+	EM(TCP_LAST_ACK)			\
+	EM(TCP_LISTEN)				\
+	EM(TCP_CLOSING)				\
+	EMe(TCP_NEW_SYN_RECV)
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a)         TRACE_DEFINE_ENUM(a);
+#define EMe(a)        TRACE_DEFINE_ENUM(a);
+
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a)         tcp_state_name(a),
+#define EMe(a)        tcp_state_name(a)
+
 #define tcp_state_name(state)	{ state, #state }
 #define show_tcp_state_name(val)			\
-	__print_symbolic(val,				\
-		tcp_state_name(TCP_ESTABLISHED),	\
-		tcp_state_name(TCP_SYN_SENT),		\
-		tcp_state_name(TCP_SYN_RECV),		\
-		tcp_state_name(TCP_FIN_WAIT1),		\
-		tcp_state_name(TCP_FIN_WAIT2),		\
-		tcp_state_name(TCP_TIME_WAIT),		\
-		tcp_state_name(TCP_CLOSE),		\
-		tcp_state_name(TCP_CLOSE_WAIT),		\
-		tcp_state_name(TCP_LAST_ACK),		\
-		tcp_state_name(TCP_LISTEN),		\
-		tcp_state_name(TCP_CLOSING),		\
-		tcp_state_name(TCP_NEW_SYN_RECV))
+	__print_symbolic(val, tcp_state_names)
 
 /*
  * tcp event with arguments sk and skb

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-09 23:40       ` Song Liu
  2017-11-10  0:42         ` Steven Rostedt
@ 2017-11-10  0:57         ` Steven Rostedt
  2017-11-10  1:42           ` Song Liu
  2017-11-10  4:56           ` Yafang Shao
  1 sibling, 2 replies; 15+ messages in thread
From: Steven Rostedt @ 2017-11-10  0:57 UTC (permalink / raw)
  To: David Miller
  Cc: Song Liu, Alexei Starovoitov, Yafang Shao, mingo, netdev, linux-kernel


From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

The TCP trace events (specifically tcp_set_state), maps emums to symbol
names via __print_symbolic(). But this only works for reading trace events
from the tracefs trace files. If perf or trace-cmd were to record these
events, the event format file does not convert the enum names into numbers,
and you get something like:

__print_symbolic(REC->oldstate,
    { TCP_ESTABLISHED, "TCP_ESTABLISHED" },
    { TCP_SYN_SENT, "TCP_SYN_SENT" },
    { TCP_SYN_RECV, "TCP_SYN_RECV" },
    { TCP_FIN_WAIT1, "TCP_FIN_WAIT1" },
    { TCP_FIN_WAIT2, "TCP_FIN_WAIT2" },
    { TCP_TIME_WAIT, "TCP_TIME_WAIT" },
    { TCP_CLOSE, "TCP_CLOSE" },
    { TCP_CLOSE_WAIT, "TCP_CLOSE_WAIT" },
    { TCP_LAST_ACK, "TCP_LAST_ACK" },
    { TCP_LISTEN, "TCP_LISTEN" },
    { TCP_CLOSING, "TCP_CLOSING" },
    { TCP_NEW_SYN_RECV, "TCP_NEW_SYN_RECV" })

Where trace-cmd and perf do not know the values of those enums.

Use the TRACE_DEFINE_ENUM() macros that will have the trace events convert
the enum strings into their values at system boot. This will allow perf and
trace-cmd to see actual numbers and not enums:

__print_symbolic(REC->oldstate,
    { 1, "TCP_ESTABLISHED" },
    { 2, "TCP_SYN_SENT" },
    { 3, "TCP_SYN_RECV" },
    { 4, "TCP_FIN_WAIT1" },
    { 5, "TCP_FIN_WAIT2" },
    { 6, "TCP_TIME_WAIT" },
    { 7, "TCP_CLOSE" },
    { 8, "TCP_CLOSE_WAIT" },
    { 9, "TCP_LAST_ACK" },
    { 10, "TCP_LISTEN" },
    { 11, "TCP_CLOSING" },
    { 12, "TCP_NEW_SYN_RECV" })

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/tcp.h | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca6cbf1..62e5bad7901f 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,21 +9,36 @@
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
 
+#define tcp_state_names				\
+	EM(TCP_ESTABLISHED)			\
+	EM(TCP_SYN_SENT)			\
+	EM(TCP_SYN_RECV)			\
+	EM(TCP_FIN_WAIT1)			\
+	EM(TCP_FIN_WAIT2)			\
+	EM(TCP_TIME_WAIT)			\
+	EM(TCP_CLOSE)				\
+	EM(TCP_CLOSE_WAIT)			\
+	EM(TCP_LAST_ACK)			\
+	EM(TCP_LISTEN)				\
+	EM(TCP_CLOSING)				\
+	EMe(TCP_NEW_SYN_RECV)
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a)         TRACE_DEFINE_ENUM(a);
+#define EMe(a)        TRACE_DEFINE_ENUM(a);
+
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a)         tcp_state_name(a),
+#define EMe(a)        tcp_state_name(a)
+
 #define tcp_state_name(state)	{ state, #state }
 #define show_tcp_state_name(val)			\
-	__print_symbolic(val,				\
-		tcp_state_name(TCP_ESTABLISHED),	\
-		tcp_state_name(TCP_SYN_SENT),		\
-		tcp_state_name(TCP_SYN_RECV),		\
-		tcp_state_name(TCP_FIN_WAIT1),		\
-		tcp_state_name(TCP_FIN_WAIT2),		\
-		tcp_state_name(TCP_TIME_WAIT),		\
-		tcp_state_name(TCP_CLOSE),		\
-		tcp_state_name(TCP_CLOSE_WAIT),		\
-		tcp_state_name(TCP_LAST_ACK),		\
-		tcp_state_name(TCP_LISTEN),		\
-		tcp_state_name(TCP_CLOSING),		\
-		tcp_state_name(TCP_NEW_SYN_RECV))
+	__print_symbolic(val, tcp_state_names)
 
 /*
  * tcp event with arguments sk and skb
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-10  0:57         ` [PATCH] tcp: Export to userspace the TCP state names for the trace events Steven Rostedt
@ 2017-11-10  1:42           ` Song Liu
  2017-11-10  4:56           ` Yafang Shao
  1 sibling, 0 replies; 15+ messages in thread
From: Song Liu @ 2017-11-10  1:42 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: David Miller, Alexei Starovoitov, Yafang Shao, mingo, netdev,
	linux-kernel


> On Nov 9, 2017, at 4:57 PM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> 
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> 
> The TCP trace events (specifically tcp_set_state), maps emums to symbol
> names via __print_symbolic(). But this only works for reading trace events
> from the tracefs trace files. If perf or trace-cmd were to record these
> events, the event format file does not convert the enum names into numbers,
> and you get something like:
> 
> __print_symbolic(REC->oldstate,
>    { TCP_ESTABLISHED, "TCP_ESTABLISHED" },
>    { TCP_SYN_SENT, "TCP_SYN_SENT" },
>    { TCP_SYN_RECV, "TCP_SYN_RECV" },
>    { TCP_FIN_WAIT1, "TCP_FIN_WAIT1" },
>    { TCP_FIN_WAIT2, "TCP_FIN_WAIT2" },
>    { TCP_TIME_WAIT, "TCP_TIME_WAIT" },
>    { TCP_CLOSE, "TCP_CLOSE" },
>    { TCP_CLOSE_WAIT, "TCP_CLOSE_WAIT" },
>    { TCP_LAST_ACK, "TCP_LAST_ACK" },
>    { TCP_LISTEN, "TCP_LISTEN" },
>    { TCP_CLOSING, "TCP_CLOSING" },
>    { TCP_NEW_SYN_RECV, "TCP_NEW_SYN_RECV" })
> 
> Where trace-cmd and perf do not know the values of those enums.
> 
> Use the TRACE_DEFINE_ENUM() macros that will have the trace events convert
> the enum strings into their values at system boot. This will allow perf and
> trace-cmd to see actual numbers and not enums:
> 
> __print_symbolic(REC->oldstate,
>    { 1, "TCP_ESTABLISHED" },
>    { 2, "TCP_SYN_SENT" },
>    { 3, "TCP_SYN_RECV" },
>    { 4, "TCP_FIN_WAIT1" },
>    { 5, "TCP_FIN_WAIT2" },
>    { 6, "TCP_TIME_WAIT" },
>    { 7, "TCP_CLOSE" },
>    { 8, "TCP_CLOSE_WAIT" },
>    { 9, "TCP_LAST_ACK" },
>    { 10, "TCP_LISTEN" },
>    { 11, "TCP_CLOSING" },
>    { 12, "TCP_NEW_SYN_RECV" })
> 
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
> include/trace/events/tcp.h | 41 ++++++++++++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 07cccca6cbf1..62e5bad7901f 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -9,21 +9,36 @@
> #include <linux/tracepoint.h>
> #include <net/ipv6.h>
> 
> +#define tcp_state_names				\
> +	EM(TCP_ESTABLISHED)			\
> +	EM(TCP_SYN_SENT)			\
> +	EM(TCP_SYN_RECV)			\
> +	EM(TCP_FIN_WAIT1)			\
> +	EM(TCP_FIN_WAIT2)			\
> +	EM(TCP_TIME_WAIT)			\
> +	EM(TCP_CLOSE)				\
> +	EM(TCP_CLOSE_WAIT)			\
> +	EM(TCP_LAST_ACK)			\
> +	EM(TCP_LISTEN)				\
> +	EM(TCP_CLOSING)				\
> +	EMe(TCP_NEW_SYN_RECV)
> +
> +/* enums need to be exported to user space */
> +#undef EM
> +#undef EMe
> +#define EM(a)         TRACE_DEFINE_ENUM(a);
> +#define EMe(a)        TRACE_DEFINE_ENUM(a);
> +
> +tcp_state_names
> +
> +#undef EM
> +#undef EMe
> +#define EM(a)         tcp_state_name(a),
> +#define EMe(a)        tcp_state_name(a)
> +
> #define tcp_state_name(state)	{ state, #state }
> #define show_tcp_state_name(val)			\
> -	__print_symbolic(val,				\
> -		tcp_state_name(TCP_ESTABLISHED),	\
> -		tcp_state_name(TCP_SYN_SENT),		\
> -		tcp_state_name(TCP_SYN_RECV),		\
> -		tcp_state_name(TCP_FIN_WAIT1),		\
> -		tcp_state_name(TCP_FIN_WAIT2),		\
> -		tcp_state_name(TCP_TIME_WAIT),		\
> -		tcp_state_name(TCP_CLOSE),		\
> -		tcp_state_name(TCP_CLOSE_WAIT),		\
> -		tcp_state_name(TCP_LAST_ACK),		\
> -		tcp_state_name(TCP_LISTEN),		\
> -		tcp_state_name(TCP_CLOSING),		\
> -		tcp_state_name(TCP_NEW_SYN_RECV))
> +	__print_symbolic(val, tcp_state_names)
> 
> /*
>  * tcp event with arguments sk and skb
> -- 
> 2.13.6
> 

Reviewed-and-tested-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-10  0:57         ` [PATCH] tcp: Export to userspace the TCP state names for the trace events Steven Rostedt
  2017-11-10  1:42           ` Song Liu
@ 2017-11-10  4:56           ` Yafang Shao
  2017-11-10 15:07             ` Steven Rostedt
  1 sibling, 1 reply; 15+ messages in thread
From: Yafang Shao @ 2017-11-10  4:56 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: David Miller, Song Liu, Alexei Starovoitov, mingo, netdev, linux-kernel

2017-11-10 8:57 GMT+08:00 Steven Rostedt <rostedt@goodmis.org>:
>
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
>
> The TCP trace events (specifically tcp_set_state), maps emums to symbol
> names via __print_symbolic(). But this only works for reading trace events
> from the tracefs trace files. If perf or trace-cmd were to record these
> events, the event format file does not convert the enum names into numbers,
> and you get something like:
>
> __print_symbolic(REC->oldstate,
>     { TCP_ESTABLISHED, "TCP_ESTABLISHED" },
>     { TCP_SYN_SENT, "TCP_SYN_SENT" },
>     { TCP_SYN_RECV, "TCP_SYN_RECV" },
>     { TCP_FIN_WAIT1, "TCP_FIN_WAIT1" },
>     { TCP_FIN_WAIT2, "TCP_FIN_WAIT2" },
>     { TCP_TIME_WAIT, "TCP_TIME_WAIT" },
>     { TCP_CLOSE, "TCP_CLOSE" },
>     { TCP_CLOSE_WAIT, "TCP_CLOSE_WAIT" },
>     { TCP_LAST_ACK, "TCP_LAST_ACK" },
>     { TCP_LISTEN, "TCP_LISTEN" },
>     { TCP_CLOSING, "TCP_CLOSING" },
>     { TCP_NEW_SYN_RECV, "TCP_NEW_SYN_RECV" })
>
> Where trace-cmd and perf do not know the values of those enums.
>
> Use the TRACE_DEFINE_ENUM() macros that will have the trace events convert
> the enum strings into their values at system boot. This will allow perf and
> trace-cmd to see actual numbers and not enums:
>
> __print_symbolic(REC->oldstate,
>     { 1, "TCP_ESTABLISHED" },
>     { 2, "TCP_SYN_SENT" },
>     { 3, "TCP_SYN_RECV" },
>     { 4, "TCP_FIN_WAIT1" },
>     { 5, "TCP_FIN_WAIT2" },
>     { 6, "TCP_TIME_WAIT" },
>     { 7, "TCP_CLOSE" },
>     { 8, "TCP_CLOSE_WAIT" },
>     { 9, "TCP_LAST_ACK" },
>     { 10, "TCP_LISTEN" },
>     { 11, "TCP_CLOSING" },
>     { 12, "TCP_NEW_SYN_RECV" })
>
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
>  include/trace/events/tcp.h | 41 ++++++++++++++++++++++++++++-------------
>  1 file changed, 28 insertions(+), 13 deletions(-)
>
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 07cccca6cbf1..62e5bad7901f 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -9,21 +9,36 @@
>  #include <linux/tracepoint.h>
>  #include <net/ipv6.h>
>
> +#define tcp_state_names                                \
> +       EM(TCP_ESTABLISHED)                     \
> +       EM(TCP_SYN_SENT)                        \
> +       EM(TCP_SYN_RECV)                        \
> +       EM(TCP_FIN_WAIT1)                       \
> +       EM(TCP_FIN_WAIT2)                       \
> +       EM(TCP_TIME_WAIT)                       \
> +       EM(TCP_CLOSE)                           \
> +       EM(TCP_CLOSE_WAIT)                      \
> +       EM(TCP_LAST_ACK)                        \
> +       EM(TCP_LISTEN)                          \
> +       EM(TCP_CLOSING)                         \
> +       EMe(TCP_NEW_SYN_RECV)
> +
> +/* enums need to be exported to user space */
> +#undef EM
> +#undef EMe
> +#define EM(a)         TRACE_DEFINE_ENUM(a);
> +#define EMe(a)        TRACE_DEFINE_ENUM(a);
> +
> +tcp_state_names
> +
> +#undef EM
> +#undef EMe
> +#define EM(a)         tcp_state_name(a),
> +#define EMe(a)        tcp_state_name(a)
> +
>  #define tcp_state_name(state)  { state, #state }
>  #define show_tcp_state_name(val)                       \
> -       __print_symbolic(val,                           \
> -               tcp_state_name(TCP_ESTABLISHED),        \
> -               tcp_state_name(TCP_SYN_SENT),           \
> -               tcp_state_name(TCP_SYN_RECV),           \
> -               tcp_state_name(TCP_FIN_WAIT1),          \
> -               tcp_state_name(TCP_FIN_WAIT2),          \
> -               tcp_state_name(TCP_TIME_WAIT),          \
> -               tcp_state_name(TCP_CLOSE),              \
> -               tcp_state_name(TCP_CLOSE_WAIT),         \
> -               tcp_state_name(TCP_LAST_ACK),           \
> -               tcp_state_name(TCP_LISTEN),             \
> -               tcp_state_name(TCP_CLOSING),            \
> -               tcp_state_name(TCP_NEW_SYN_RECV))
> +       __print_symbolic(val, tcp_state_names)
>
>  /*
>   * tcp event with arguments sk and skb
> --
> 2.13.6
>

Could the macro tcp_state_name() be renamed ?
If <trace/events/tcp.h> is included in include/net/tcp.h, it will
cause compile error, because there's another function tcp_state_name()
defined in net/netfilter/ipvs/ip_vs_proto_tcp.c.
    static const char * tcp_state_name(int state)
    {

        if (state >= IP_VS_TCP_S_LAST)

            return "ERR!";

        return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";

    }


Thanks
Yafang

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-10  4:56           ` Yafang Shao
@ 2017-11-10 15:07             ` Steven Rostedt
  2017-11-10 17:37               ` Song Liu
  2017-11-11  2:06               ` Yafang Shao
  0 siblings, 2 replies; 15+ messages in thread
From: Steven Rostedt @ 2017-11-10 15:07 UTC (permalink / raw)
  To: Yafang Shao
  Cc: David Miller, Song Liu, Alexei Starovoitov, mingo, netdev, linux-kernel

On Fri, 10 Nov 2017 12:56:06 +0800
Yafang Shao <laoar.shao@gmail.com> wrote:

> Could the macro tcp_state_name() be renamed ?
> If <trace/events/tcp.h> is included in include/net/tcp.h, it will

Ideally, you don't want to include trace/events/*.h headers in other
headers, as they can have side effects if those headers are included in
other trace/events/*.h headers.

> cause compile error, because there's another function tcp_state_name()
> defined in net/netfilter/ipvs/ip_vs_proto_tcp.c.
>     static const char * tcp_state_name(int state)
>     {
> 
>         if (state >= IP_VS_TCP_S_LAST)
> 
>             return "ERR!";
> 
>         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
> 
>     }

But that said, I didn't make up the trace_state_name(), it was already
there in net-next before this patch.

But yeah, in actuality, I would have just done:

#define EM(a)	{ a, #a },
#define EMe(a)	{ a, #a }

directly. Which we can still do.

-- Steve

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-10 15:07             ` Steven Rostedt
@ 2017-11-10 17:37               ` Song Liu
  2017-11-11  2:06               ` Yafang Shao
  1 sibling, 0 replies; 15+ messages in thread
From: Song Liu @ 2017-11-10 17:37 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Yafang Shao, David Miller, Alexei Starovoitov, mingo, netdev,
	linux-kernel


> On Nov 10, 2017, at 7:07 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> On Fri, 10 Nov 2017 12:56:06 +0800
> Yafang Shao <laoar.shao@gmail.com> wrote:
> 
>> Could the macro tcp_state_name() be renamed ?
>> If <trace/events/tcp.h> is included in include/net/tcp.h, it will
> 
> Ideally, you don't want to include trace/events/*.h headers in other
> headers, as they can have side effects if those headers are included in
> other trace/events/*.h headers.
> 
>> cause compile error, because there's another function tcp_state_name()
>> defined in net/netfilter/ipvs/ip_vs_proto_tcp.c.
>>    static const char * tcp_state_name(int state)
>>    {
>> 
>>        if (state >= IP_VS_TCP_S_LAST)
>> 
>>            return "ERR!";
>> 
>>        return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
>> 
>>    }
> 
> But that said, I didn't make up the trace_state_name(), it was already
> there in net-next before this patch.
> 
> But yeah, in actuality, I would have just done:
> 
> #define EM(a)	{ a, #a },
> #define EMe(a)	{ a, #a }
> 
> directly. Which we can still do.
> 
> -- Steve
> 

How about we undef tcp_state_name and tcp_event_names at the end of
include/trace/events/tcp.h?

Thanks,
Song

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-10 15:07             ` Steven Rostedt
  2017-11-10 17:37               ` Song Liu
@ 2017-11-11  2:06               ` Yafang Shao
  2017-11-11  3:32                 ` Steven Rostedt
  1 sibling, 1 reply; 15+ messages in thread
From: Yafang Shao @ 2017-11-11  2:06 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: David Miller, Song Liu, Alexei Starovoitov, mingo, netdev, linux-kernel

2017-11-10 15:07 GMT+00:00 Steven Rostedt <rostedt@goodmis.org>:
> On Fri, 10 Nov 2017 12:56:06 +0800
> Yafang Shao <laoar.shao@gmail.com> wrote:
>
>> Could the macro tcp_state_name() be renamed ?
>> If <trace/events/tcp.h> is included in include/net/tcp.h, it will
>
> Ideally, you don't want to include trace/events/*.h headers in other
> headers, as they can have side effects if those headers are included in
> other trace/events/*.h headers.
>

Actually I find trace/events/*.h is included in lots of other headers,
for example,

net/rxrpc/ar-internal.h
include/linux/bpf_trace.h
fs/f2fs/trace.h
fs/afs/internal.h
arch/x86/include/asm/mmu_context.h
...

Are these files doing properly ?
Should we fix them ?

But per my understanding, it is ok to include  trace/events/*.h in
other headers because we defined TRACE_SYSTEM as well, as a
consequence those headers should not included in trace/events/*.h. If
that happens, it may means that one of the these two TRACE_SYSTEM is
not defined properly. Maybe these two TRACE_SYSTEM should be merged to
one TRACE_SYSTEM.


>> cause compile error, because there's another function tcp_state_name()
>> defined in net/netfilter/ipvs/ip_vs_proto_tcp.c.
>>     static const char * tcp_state_name(int state)
>>     {
>>
>>         if (state >= IP_VS_TCP_S_LAST)
>>
>>             return "ERR!";
>>
>>         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
>>
>>     }
>
> But that said, I didn't make up the trace_state_name(), it was already
> there in net-next before this patch.
>

I know that is not your fault.
But as you are modifying this file, it is better to modify it in your
patch as well.
So we need not submit another new patch to fix it.

> But yeah, in actuality, I would have just done:
>
> #define EM(a)   { a, #a },
> #define EMe(a)  { a, #a }
>
> directly. Which we can still do.
>
> -- Steve
>

The suggestion from Song is good to fix it.

Thanks
Yafang

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-11  2:06               ` Yafang Shao
@ 2017-11-11  3:32                 ` Steven Rostedt
  2017-11-11 13:26                   ` Yafang Shao
  0 siblings, 1 reply; 15+ messages in thread
From: Steven Rostedt @ 2017-11-11  3:32 UTC (permalink / raw)
  To: Yafang Shao
  Cc: David Miller, Song Liu, Alexei Starovoitov, mingo, netdev, linux-kernel

On Sat, 11 Nov 2017 02:06:00 +0000
Yafang Shao <laoar.shao@gmail.com> wrote:

> 2017-11-10 15:07 GMT+00:00 Steven Rostedt <rostedt@goodmis.org>:
> > On Fri, 10 Nov 2017 12:56:06 +0800
> > Yafang Shao <laoar.shao@gmail.com> wrote:
> >  
> >> Could the macro tcp_state_name() be renamed ?
> >> If <trace/events/tcp.h> is included in include/net/tcp.h, it will  
> >
> > Ideally, you don't want to include trace/events/*.h headers in other
> > headers, as they can have side effects if those headers are included in
> > other trace/events/*.h headers.
> >  
> 
> Actually I find trace/events/*.h is included in lots of other headers,
> for example,
> 
> net/rxrpc/ar-internal.h

This is an internal header, so it's not that likely to be used where it
shouldn't be.

> include/linux/bpf_trace.h
> fs/f2fs/trace.h

The above two are actually headers specifically used to pull in the
trace/events/*.h headers.

> fs/afs/internal.h

another internal header. Unlikely to be misused.

> arch/x86/include/asm/mmu_context.h

This one, hmm, probably should be fixed.

> ...
> 
> Are these files doing properly ?

Most yes, some probably not.

> Should we fix them ?

Probably, but if they are used incorrectly, it would usually fail on
build (The same global functions and variables would be defined).

> 
> But per my understanding, it is ok to include  trace/events/*.h in
> other headers because we defined TRACE_SYSTEM as well, as a
> consequence those headers should not included in trace/events/*.h. If
> that happens, it may means that one of the these two TRACE_SYSTEM is
> not defined properly. Maybe these two TRACE_SYSTEM should be merged to
> one TRACE_SYSTEM.

Two different files may have the same TRACE_SYSTEM defined. That's not
an issue.

The issue is, if you have a trace/events/*.h header in a popular file
(like it use to be in include/linux/slab.h), then it can cause issues
if another trace/events/*.h header includes it. That's because each
trace/events/*.h header must be included with CREATE_TRACE_POINTS only
once.

> 
> 
> >> cause compile error, because there's another function tcp_state_name()
> >> defined in net/netfilter/ipvs/ip_vs_proto_tcp.c.
> >>     static const char * tcp_state_name(int state)
> >>     {
> >>
> >>         if (state >= IP_VS_TCP_S_LAST)
> >>
> >>             return "ERR!";
> >>
> >>         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
> >>
> >>     }  
> >
> > But that said, I didn't make up the trace_state_name(), it was already
> > there in net-next before this patch.
> >  
> 
> I know that is not your fault.

:-)

> But as you are modifying this file, it is better to modify it in your
> patch as well.
> So we need not submit another new patch to fix it.

I could whip up a patch 2.

> 
> > But yeah, in actuality, I would have just done:
> >
> > #define EM(a)   { a, #a },
> > #define EMe(a)  { a, #a }
> >
> > directly. Which we can still do.
> >
> > -- Steve
> >  
> 
> The suggestion from Song is good to fix it.

Song's suggestion seems like it can simple be a patch added on top of
mine. As it is somewhat agnostic to the fix I'm making. That is, it's a
different problem, and thus should be a different patch.

-- Steve

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tcp: Export to userspace the TCP state names for the trace events
  2017-11-11  3:32                 ` Steven Rostedt
@ 2017-11-11 13:26                   ` Yafang Shao
  0 siblings, 0 replies; 15+ messages in thread
From: Yafang Shao @ 2017-11-11 13:26 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: David Miller, Song Liu, Alexei Starovoitov, mingo, netdev, linux-kernel

2017-11-11 3:32 GMT+00:00 Steven Rostedt <rostedt@goodmis.org>:
> On Sat, 11 Nov 2017 02:06:00 +0000
> Yafang Shao <laoar.shao@gmail.com> wrote:
>
>> 2017-11-10 15:07 GMT+00:00 Steven Rostedt <rostedt@goodmis.org>:
>> > On Fri, 10 Nov 2017 12:56:06 +0800
>> > Yafang Shao <laoar.shao@gmail.com> wrote:
>> >
>> >> Could the macro tcp_state_name() be renamed ?
>> >> If <trace/events/tcp.h> is included in include/net/tcp.h, it will
>> >
>> > Ideally, you don't want to include trace/events/*.h headers in other
>> > headers, as they can have side effects if those headers are included in
>> > other trace/events/*.h headers.
>> >
>>
>> Actually I find trace/events/*.h is included in lots of other headers,
>> for example,
>>
>> net/rxrpc/ar-internal.h
>
> This is an internal header, so it's not that likely to be used where it
> shouldn't be.
>
>> include/linux/bpf_trace.h
>> fs/f2fs/trace.h
>
> The above two are actually headers specifically used to pull in the
> trace/events/*.h headers.
>
>> fs/afs/internal.h
>
> another internal header. Unlikely to be misused.
>
>> arch/x86/include/asm/mmu_context.h
>
> This one, hmm, probably should be fixed.
>
>> ...
>>
>> Are these files doing properly ?
>
> Most yes, some probably not.
>
>> Should we fix them ?
>
> Probably, but if they are used incorrectly, it would usually fail on
> build (The same global functions and variables would be defined).
>
>>
>> But per my understanding, it is ok to include  trace/events/*.h in
>> other headers because we defined TRACE_SYSTEM as well, as a
>> consequence those headers should not included in trace/events/*.h. If
>> that happens, it may means that one of the these two TRACE_SYSTEM is
>> not defined properly. Maybe these two TRACE_SYSTEM should be merged to
>> one TRACE_SYSTEM.
>
> Two different files may have the same TRACE_SYSTEM defined. That's not
> an issue.
>
> The issue is, if you have a trace/events/*.h header in a popular file
> (like it use to be in include/linux/slab.h), then it can cause issues
> if another trace/events/*.h header includes it. That's because each
> trace/events/*.h header must be included with CREATE_TRACE_POINTS only
> once.
>

Understood.
Thanks for explanation.

>>
>>
>> >> cause compile error, because there's another function tcp_state_name()
>> >> defined in net/netfilter/ipvs/ip_vs_proto_tcp.c.
>> >>     static const char * tcp_state_name(int state)
>> >>     {
>> >>
>> >>         if (state >= IP_VS_TCP_S_LAST)
>> >>
>> >>             return "ERR!";
>> >>
>> >>         return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
>> >>
>> >>     }
>> >
>> > But that said, I didn't make up the trace_state_name(), it was already
>> > there in net-next before this patch.
>> >
>>
>> I know that is not your fault.
>
> :-)
>
>> But as you are modifying this file, it is better to modify it in your
>> patch as well.
>> So we need not submit another new patch to fix it.
>
> I could whip up a patch 2.
>
>>
>> > But yeah, in actuality, I would have just done:
>> >
>> > #define EM(a)   { a, #a },
>> > #define EMe(a)  { a, #a }
>> >
>> > directly. Which we can still do.
>> >
>> > -- Steve
>> >
>>
>> The suggestion from Song is good to fix it.
>
> Song's suggestion seems like it can simple be a patch added on top of
> mine. As it is somewhat agnostic to the fix I'm making. That is, it's a
> different problem, and thus should be a different patch.
>

Got it. These two issues should be fixed in two different patches :-)

Thanks
Yafang

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-11-11 13:26 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-09  6:01 [PATCH] net/tcp: introduce TRACE_EVENT for TCP/IPv4 state transition Yafang Shao
2017-11-09  6:43 ` Alexei Starovoitov
2017-11-09  6:50   ` Yafang Shao
2017-11-09 18:18   ` Steven Rostedt
2017-11-09 18:34     ` Song Liu
2017-11-09 23:40       ` Song Liu
2017-11-10  0:42         ` Steven Rostedt
2017-11-10  0:57         ` [PATCH] tcp: Export to userspace the TCP state names for the trace events Steven Rostedt
2017-11-10  1:42           ` Song Liu
2017-11-10  4:56           ` Yafang Shao
2017-11-10 15:07             ` Steven Rostedt
2017-11-10 17:37               ` Song Liu
2017-11-11  2:06               ` Yafang Shao
2017-11-11  3:32                 ` Steven Rostedt
2017-11-11 13:26                   ` Yafang Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).