All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31  5:18 ` Roopa Prabhu
  0 siblings, 0 replies; 20+ messages in thread
From: Roopa Prabhu @ 2017-08-31  5:18 UTC (permalink / raw)
  To: davem; +Cc: netdev, nikolay, f.fainelli, andrew, bridge

From: Roopa Prabhu <roopa@cumulusnetworks.com>

This extends bridge fdb table tracepoints to also cover
learned fdb entries in the br_fdb_update path. Note that
unlike other tracepoints I have moved this to when the fdb
is modified because this is in the datapath and can generate
a lot of noise in the trace output. br_fdb_update is also called
from added_by_user context in the NTF_USE case which is already
traced ..hence the !added_by_user check.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
 net/bridge/br_fdb.c           |  5 ++++-
 net/core/net-traces.c         |  1 +
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
index 0f1cde0..1bee3e7 100644
--- a/include/trace/events/bridge.h
+++ b/include/trace/events/bridge.h
@@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
 		  __entry->addr[4], __entry->addr[5], __entry->vid)
 );
 
+TRACE_EVENT(br_fdb_update,
+
+	TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
+		 const unsigned char *addr, u16 vid, bool added_by_user),
+
+	TP_ARGS(br, source, addr, vid, added_by_user),
+
+	TP_STRUCT__entry(
+		__string(br_dev, br->dev->name)
+		__string(dev, source->dev->name)
+		__array(unsigned char, addr, ETH_ALEN)
+		__field(u16, vid)
+		__field(bool, added_by_user)
+	),
+
+	TP_fast_assign(
+		__assign_str(br_dev, br->dev->name);
+		__assign_str(dev, source->dev->name);
+		memcpy(__entry->addr, addr, ETH_ALEN);
+		__entry->vid = vid;
+		__entry->added_by_user = added_by_user;
+	),
+
+	TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d",
+		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
+		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
+		  __entry->addr[4], __entry->addr[5], __entry->vid,
+		  __entry->added_by_user)
+);
+
+
 #endif /* _TRACE_BRIDGE_H */
 
 /* This part must be outside protection */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index be5e1da..4ea5c8b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -583,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb->updated = now;
 			if (unlikely(added_by_user))
 				fdb->added_by_user = 1;
-			if (unlikely(fdb_modified))
+			if (unlikely(fdb_modified)) {
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
+			}
 		}
 	} else {
 		spin_lock(&br->hash_lock);
@@ -593,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			if (fdb) {
 				if (unlikely(added_by_user))
 					fdb->added_by_user = 1;
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 			}
 		}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 4a0292c..1132820 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -42,6 +42,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
 EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
 #endif
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31  5:18 ` Roopa Prabhu
  0 siblings, 0 replies; 20+ messages in thread
From: Roopa Prabhu @ 2017-08-31  5:18 UTC (permalink / raw)
  To: davem; +Cc: nikolay, netdev, f.fainelli, bridge, andrew

From: Roopa Prabhu <roopa@cumulusnetworks.com>

This extends bridge fdb table tracepoints to also cover
learned fdb entries in the br_fdb_update path. Note that
unlike other tracepoints I have moved this to when the fdb
is modified because this is in the datapath and can generate
a lot of noise in the trace output. br_fdb_update is also called
from added_by_user context in the NTF_USE case which is already
traced ..hence the !added_by_user check.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
 net/bridge/br_fdb.c           |  5 ++++-
 net/core/net-traces.c         |  1 +
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
index 0f1cde0..1bee3e7 100644
--- a/include/trace/events/bridge.h
+++ b/include/trace/events/bridge.h
@@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
 		  __entry->addr[4], __entry->addr[5], __entry->vid)
 );
 
+TRACE_EVENT(br_fdb_update,
+
+	TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
+		 const unsigned char *addr, u16 vid, bool added_by_user),
+
+	TP_ARGS(br, source, addr, vid, added_by_user),
+
+	TP_STRUCT__entry(
+		__string(br_dev, br->dev->name)
+		__string(dev, source->dev->name)
+		__array(unsigned char, addr, ETH_ALEN)
+		__field(u16, vid)
+		__field(bool, added_by_user)
+	),
+
+	TP_fast_assign(
+		__assign_str(br_dev, br->dev->name);
+		__assign_str(dev, source->dev->name);
+		memcpy(__entry->addr, addr, ETH_ALEN);
+		__entry->vid = vid;
+		__entry->added_by_user = added_by_user;
+	),
+
+	TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d",
+		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
+		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
+		  __entry->addr[4], __entry->addr[5], __entry->vid,
+		  __entry->added_by_user)
+);
+
+
 #endif /* _TRACE_BRIDGE_H */
 
 /* This part must be outside protection */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index be5e1da..4ea5c8b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -583,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb->updated = now;
 			if (unlikely(added_by_user))
 				fdb->added_by_user = 1;
-			if (unlikely(fdb_modified))
+			if (unlikely(fdb_modified)) {
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
+			}
 		}
 	} else {
 		spin_lock(&br->hash_lock);
@@ -593,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			if (fdb) {
 				if (unlikely(added_by_user))
 					fdb->added_by_user = 1;
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 			}
 		}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 4a0292c..1132820 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -42,6 +42,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
 EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
 #endif
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31  5:18 ` [Bridge] " Roopa Prabhu
@ 2017-08-31 12:38   ` Jesper Dangaard Brouer
  -1 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2017-08-31 12:38 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: brouer, davem, netdev, nikolay, f.fainelli, andrew, bridge

On Wed, 30 Aug 2017 22:18:13 -0700
Roopa Prabhu <roopa@cumulusnetworks.com> wrote:

> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This extends bridge fdb table tracepoints to also cover
> learned fdb entries in the br_fdb_update path. Note that
> unlike other tracepoints I have moved this to when the fdb
> is modified because this is in the datapath and can generate
> a lot of noise in the trace output. br_fdb_update is also called
> from added_by_user context in the NTF_USE case which is already
> traced ..hence the !added_by_user check.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>  net/bridge/br_fdb.c           |  5 ++++-
>  net/core/net-traces.c         |  1 +
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
> index 0f1cde0..1bee3e7 100644
> --- a/include/trace/events/bridge.h
> +++ b/include/trace/events/bridge.h
> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
>  		  __entry->addr[4], __entry->addr[5], __entry->vid)
>  );
>  
> +TRACE_EVENT(br_fdb_update,
> +
> +	TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
> +		 const unsigned char *addr, u16 vid, bool added_by_user),
> +
> +	TP_ARGS(br, source, addr, vid, added_by_user),
> +
> +	TP_STRUCT__entry(
> +		__string(br_dev, br->dev->name)
> +		__string(dev, source->dev->name)

I have found that using the device string name is 

(1) slow as it involves strcpy+strlen

 See [1]+[2] where a single dev-name costed me 16 ns, and the base
 overhead of a bpf attached tracepoint is 25 ns (see [3]).

 [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
 [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
 [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64

(2) strings are also harder to work-with/extract when attaching a bpf_prog

See the trouble I'm in accessing a dev string here napi:napi_poll here:
 https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58

Using ifindex'es in userspace is fairly easy see man if_indextoname(3).

> +		__array(unsigned char, addr, ETH_ALEN)
> +		__field(u16, vid)
> +		__field(bool, added_by_user)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(br_dev, br->dev->name);
> +		__assign_str(dev, source->dev->name);
> +		memcpy(__entry->addr, addr, ETH_ALEN);
> +		__entry->vid = vid;
> +		__entry->added_by_user = added_by_user;
> +	),
> +
> +	TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d",
> +		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
> +		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
> +		  __entry->addr[4], __entry->addr[5], __entry->vid,
> +		  __entry->added_by_user)
> +);

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 12:38   ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2017-08-31 12:38 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: andrew, f.fainelli, nikolay, netdev, bridge, brouer, davem

On Wed, 30 Aug 2017 22:18:13 -0700
Roopa Prabhu <roopa@cumulusnetworks.com> wrote:

> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This extends bridge fdb table tracepoints to also cover
> learned fdb entries in the br_fdb_update path. Note that
> unlike other tracepoints I have moved this to when the fdb
> is modified because this is in the datapath and can generate
> a lot of noise in the trace output. br_fdb_update is also called
> from added_by_user context in the NTF_USE case which is already
> traced ..hence the !added_by_user check.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>  net/bridge/br_fdb.c           |  5 ++++-
>  net/core/net-traces.c         |  1 +
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 
> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
> index 0f1cde0..1bee3e7 100644
> --- a/include/trace/events/bridge.h
> +++ b/include/trace/events/bridge.h
> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
>  		  __entry->addr[4], __entry->addr[5], __entry->vid)
>  );
>  
> +TRACE_EVENT(br_fdb_update,
> +
> +	TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
> +		 const unsigned char *addr, u16 vid, bool added_by_user),
> +
> +	TP_ARGS(br, source, addr, vid, added_by_user),
> +
> +	TP_STRUCT__entry(
> +		__string(br_dev, br->dev->name)
> +		__string(dev, source->dev->name)

I have found that using the device string name is 

(1) slow as it involves strcpy+strlen

 See [1]+[2] where a single dev-name costed me 16 ns, and the base
 overhead of a bpf attached tracepoint is 25 ns (see [3]).

 [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
 [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
 [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64

(2) strings are also harder to work-with/extract when attaching a bpf_prog

See the trouble I'm in accessing a dev string here napi:napi_poll here:
 https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58

Using ifindex'es in userspace is fairly easy see man if_indextoname(3).

> +		__array(unsigned char, addr, ETH_ALEN)
> +		__field(u16, vid)
> +		__field(bool, added_by_user)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(br_dev, br->dev->name);
> +		__assign_str(dev, source->dev->name);
> +		memcpy(__entry->addr, addr, ETH_ALEN);
> +		__entry->vid = vid;
> +		__entry->added_by_user = added_by_user;
> +	),
> +
> +	TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d",
> +		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
> +		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
> +		  __entry->addr[4], __entry->addr[5], __entry->vid,
> +		  __entry->added_by_user)
> +);

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31  5:18 ` [Bridge] " Roopa Prabhu
@ 2017-08-31 14:19   ` Nikolay Aleksandrov
  -1 siblings, 0 replies; 20+ messages in thread
From: Nikolay Aleksandrov @ 2017-08-31 14:19 UTC (permalink / raw)
  To: Roopa Prabhu, davem; +Cc: netdev, f.fainelli, andrew, bridge

On 31/08/17 08:18, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This extends bridge fdb table tracepoints to also cover
> learned fdb entries in the br_fdb_update path. Note that
> unlike other tracepoints I have moved this to when the fdb
> is modified because this is in the datapath and can generate
> a lot of noise in the trace output. br_fdb_update is also called
> from added_by_user context in the NTF_USE case which is already
> traced ..hence the !added_by_user check.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>  net/bridge/br_fdb.c           |  5 ++++-
>  net/core/net-traces.c         |  1 +
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 

Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 14:19   ` Nikolay Aleksandrov
  0 siblings, 0 replies; 20+ messages in thread
From: Nikolay Aleksandrov @ 2017-08-31 14:19 UTC (permalink / raw)
  To: Roopa Prabhu, davem; +Cc: bridge, netdev, f.fainelli, andrew

On 31/08/17 08:18, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This extends bridge fdb table tracepoints to also cover
> learned fdb entries in the br_fdb_update path. Note that
> unlike other tracepoints I have moved this to when the fdb
> is modified because this is in the datapath and can generate
> a lot of noise in the trace output. br_fdb_update is also called
> from added_by_user context in the NTF_USE case which is already
> traced ..hence the !added_by_user check.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>  net/bridge/br_fdb.c           |  5 ++++-
>  net/core/net-traces.c         |  1 +
>  3 files changed, 36 insertions(+), 1 deletion(-)
> 

Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31 12:38   ` [Bridge] " Jesper Dangaard Brouer
@ 2017-08-31 15:21     ` Roopa Prabhu
  -1 siblings, 0 replies; 20+ messages in thread
From: Roopa Prabhu @ 2017-08-31 15:21 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: davem, netdev, Nikolay Aleksandrov, Florian Fainelli,
	Andrew Lunn, bridge

On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> On Wed, 30 Aug 2017 22:18:13 -0700
> Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This extends bridge fdb table tracepoints to also cover
>> learned fdb entries in the br_fdb_update path. Note that
>> unlike other tracepoints I have moved this to when the fdb
>> is modified because this is in the datapath and can generate
>> a lot of noise in the trace output. br_fdb_update is also called
>> from added_by_user context in the NTF_USE case which is already
>> traced ..hence the !added_by_user check.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>>  net/bridge/br_fdb.c           |  5 ++++-
>>  net/core/net-traces.c         |  1 +
>>  3 files changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
>> index 0f1cde0..1bee3e7 100644
>> --- a/include/trace/events/bridge.h
>> +++ b/include/trace/events/bridge.h
>> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
>>                 __entry->addr[4], __entry->addr[5], __entry->vid)
>>  );
>>
>> +TRACE_EVENT(br_fdb_update,
>> +
>> +     TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
>> +              const unsigned char *addr, u16 vid, bool added_by_user),
>> +
>> +     TP_ARGS(br, source, addr, vid, added_by_user),
>> +
>> +     TP_STRUCT__entry(
>> +             __string(br_dev, br->dev->name)
>> +             __string(dev, source->dev->name)
>
> I have found that using the device string name is
>
> (1) slow as it involves strcpy+strlen
>
>  See [1]+[2] where a single dev-name costed me 16 ns, and the base
>  overhead of a bpf attached tracepoint is 25 ns (see [3]).
>
>  [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
>  [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
>  [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64
>
> (2) strings are also harder to work-with/extract when attaching a bpf_prog
>
> See the trouble I'm in accessing a dev string here napi:napi_poll here:
>  https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58
>
> Using ifindex'es in userspace is fairly easy see man if_indextoname(3).
>

Jesper thanks for the data!. GTK. Looking at include/trace/events,
currently almost all tracepoints use dev->name.
These bridge tracepoints in context are primarily for debugging fdb
updates only, not for every packet and hence not in the performance
path.
In large scale deployments with thousands of bridge ports and fdb
entries, dev->name will definately make it easier to trouble-shoot.
So, I did like to leave these with dev->name unless there are strong objections.

thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 15:21     ` Roopa Prabhu
  0 siblings, 0 replies; 20+ messages in thread
From: Roopa Prabhu @ 2017-08-31 15:21 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Andrew Lunn, Florian Fainelli, Nikolay Aleksandrov, netdev,
	bridge, davem

On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> On Wed, 30 Aug 2017 22:18:13 -0700
> Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This extends bridge fdb table tracepoints to also cover
>> learned fdb entries in the br_fdb_update path. Note that
>> unlike other tracepoints I have moved this to when the fdb
>> is modified because this is in the datapath and can generate
>> a lot of noise in the trace output. br_fdb_update is also called
>> from added_by_user context in the NTF_USE case which is already
>> traced ..hence the !added_by_user check.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>>  net/bridge/br_fdb.c           |  5 ++++-
>>  net/core/net-traces.c         |  1 +
>>  3 files changed, 36 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
>> index 0f1cde0..1bee3e7 100644
>> --- a/include/trace/events/bridge.h
>> +++ b/include/trace/events/bridge.h
>> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
>>                 __entry->addr[4], __entry->addr[5], __entry->vid)
>>  );
>>
>> +TRACE_EVENT(br_fdb_update,
>> +
>> +     TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
>> +              const unsigned char *addr, u16 vid, bool added_by_user),
>> +
>> +     TP_ARGS(br, source, addr, vid, added_by_user),
>> +
>> +     TP_STRUCT__entry(
>> +             __string(br_dev, br->dev->name)
>> +             __string(dev, source->dev->name)
>
> I have found that using the device string name is
>
> (1) slow as it involves strcpy+strlen
>
>  See [1]+[2] where a single dev-name costed me 16 ns, and the base
>  overhead of a bpf attached tracepoint is 25 ns (see [3]).
>
>  [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
>  [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
>  [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64
>
> (2) strings are also harder to work-with/extract when attaching a bpf_prog
>
> See the trouble I'm in accessing a dev string here napi:napi_poll here:
>  https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58
>
> Using ifindex'es in userspace is fairly easy see man if_indextoname(3).
>

Jesper thanks for the data!. GTK. Looking at include/trace/events,
currently almost all tracepoints use dev->name.
These bridge tracepoints in context are primarily for debugging fdb
updates only, not for every packet and hence not in the performance
path.
In large scale deployments with thousands of bridge ports and fdb
entries, dev->name will definately make it easier to trouble-shoot.
So, I did like to leave these with dev->name unless there are strong objections.

thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31 15:21     ` [Bridge] " Roopa Prabhu
@ 2017-08-31 15:30       ` David Ahern
  -1 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2017-08-31 15:30 UTC (permalink / raw)
  To: Roopa Prabhu, Jesper Dangaard Brouer
  Cc: davem, netdev, Nikolay Aleksandrov, Florian Fainelli,
	Andrew Lunn, bridge

On 8/31/17 9:21 AM, Roopa Prabhu wrote:
> On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer
> <brouer@redhat.com> wrote:
>> On Wed, 30 Aug 2017 22:18:13 -0700
>> Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
>>
>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>
>>> This extends bridge fdb table tracepoints to also cover
>>> learned fdb entries in the br_fdb_update path. Note that
>>> unlike other tracepoints I have moved this to when the fdb
>>> is modified because this is in the datapath and can generate
>>> a lot of noise in the trace output. br_fdb_update is also called
>>> from added_by_user context in the NTF_USE case which is already
>>> traced ..hence the !added_by_user check.
>>>
>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>> ---
>>>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>>>  net/bridge/br_fdb.c           |  5 ++++-
>>>  net/core/net-traces.c         |  1 +
>>>  3 files changed, 36 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
>>> index 0f1cde0..1bee3e7 100644
>>> --- a/include/trace/events/bridge.h
>>> +++ b/include/trace/events/bridge.h
>>> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
>>>                 __entry->addr[4], __entry->addr[5], __entry->vid)
>>>  );
>>>
>>> +TRACE_EVENT(br_fdb_update,
>>> +
>>> +     TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
>>> +              const unsigned char *addr, u16 vid, bool added_by_user),
>>> +
>>> +     TP_ARGS(br, source, addr, vid, added_by_user),
>>> +
>>> +     TP_STRUCT__entry(
>>> +             __string(br_dev, br->dev->name)
>>> +             __string(dev, source->dev->name)
>>
>> I have found that using the device string name is
>>
>> (1) slow as it involves strcpy+strlen
>>
>>  See [1]+[2] where a single dev-name costed me 16 ns, and the base
>>  overhead of a bpf attached tracepoint is 25 ns (see [3]).
>>
>>  [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
>>  [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
>>  [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64
>>
>> (2) strings are also harder to work-with/extract when attaching a bpf_prog
>>
>> See the trouble I'm in accessing a dev string here napi:napi_poll here:
>>  https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58
>>
>> Using ifindex'es in userspace is fairly easy see man if_indextoname(3).
>>
> 
> Jesper thanks for the data!. GTK. Looking at include/trace/events,
> currently almost all tracepoints use dev->name.
> These bridge tracepoints in context are primarily for debugging fdb
> updates only, not for every packet and hence not in the performance
> path.
> In large scale deployments with thousands of bridge ports and fdb
> entries, dev->name will definately make it easier to trouble-shoot.
> So, I did like to leave these with dev->name unless there are strong objections.

+1 for user friendliness for debugging tracepoints. The device name is
also more user friendly when adding filters to the data collection.

Being able to add bpf everywhere certainly changes the game a bit, but
we should not relinquish ease of use and understanding for the potential
that someone might want to put a bpf program on the tracepoint and want
to maintain high performance.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 15:30       ` David Ahern
  0 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2017-08-31 15:30 UTC (permalink / raw)
  To: Roopa Prabhu, Jesper Dangaard Brouer
  Cc: Andrew Lunn, Florian Fainelli, Nikolay Aleksandrov, netdev,
	bridge, davem

On 8/31/17 9:21 AM, Roopa Prabhu wrote:
> On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer
> <brouer@redhat.com> wrote:
>> On Wed, 30 Aug 2017 22:18:13 -0700
>> Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
>>
>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>
>>> This extends bridge fdb table tracepoints to also cover
>>> learned fdb entries in the br_fdb_update path. Note that
>>> unlike other tracepoints I have moved this to when the fdb
>>> is modified because this is in the datapath and can generate
>>> a lot of noise in the trace output. br_fdb_update is also called
>>> from added_by_user context in the NTF_USE case which is already
>>> traced ..hence the !added_by_user check.
>>>
>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>> ---
>>>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
>>>  net/bridge/br_fdb.c           |  5 ++++-
>>>  net/core/net-traces.c         |  1 +
>>>  3 files changed, 36 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
>>> index 0f1cde0..1bee3e7 100644
>>> --- a/include/trace/events/bridge.h
>>> +++ b/include/trace/events/bridge.h
>>> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
>>>                 __entry->addr[4], __entry->addr[5], __entry->vid)
>>>  );
>>>
>>> +TRACE_EVENT(br_fdb_update,
>>> +
>>> +     TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
>>> +              const unsigned char *addr, u16 vid, bool added_by_user),
>>> +
>>> +     TP_ARGS(br, source, addr, vid, added_by_user),
>>> +
>>> +     TP_STRUCT__entry(
>>> +             __string(br_dev, br->dev->name)
>>> +             __string(dev, source->dev->name)
>>
>> I have found that using the device string name is
>>
>> (1) slow as it involves strcpy+strlen
>>
>>  See [1]+[2] where a single dev-name costed me 16 ns, and the base
>>  overhead of a bpf attached tracepoint is 25 ns (see [3]).
>>
>>  [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
>>  [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
>>  [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64
>>
>> (2) strings are also harder to work-with/extract when attaching a bpf_prog
>>
>> See the trouble I'm in accessing a dev string here napi:napi_poll here:
>>  https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58
>>
>> Using ifindex'es in userspace is fairly easy see man if_indextoname(3).
>>
> 
> Jesper thanks for the data!. GTK. Looking at include/trace/events,
> currently almost all tracepoints use dev->name.
> These bridge tracepoints in context are primarily for debugging fdb
> updates only, not for every packet and hence not in the performance
> path.
> In large scale deployments with thousands of bridge ports and fdb
> entries, dev->name will definately make it easier to trouble-shoot.
> So, I did like to leave these with dev->name unless there are strong objections.

+1 for user friendliness for debugging tracepoints. The device name is
also more user friendly when adding filters to the data collection.

Being able to add bpf everywhere certainly changes the game a bit, but
we should not relinquish ease of use and understanding for the potential
that someone might want to put a bpf program on the tracepoint and want
to maintain high performance.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31 15:30       ` [Bridge] " David Ahern
@ 2017-08-31 16:20         ` Jesper Dangaard Brouer
  -1 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2017-08-31 16:20 UTC (permalink / raw)
  To: David Ahern
  Cc: Roopa Prabhu, davem, netdev, Nikolay Aleksandrov,
	Florian Fainelli, Andrew Lunn, bridge, brouer,
	Arnaldo Carvalho de Melo, Peter Zijlstra

On Thu, 31 Aug 2017 09:30:05 -0600
David Ahern <dsahern@gmail.com> wrote:

> On 8/31/17 9:21 AM, Roopa Prabhu wrote:
> > On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer
> > <brouer@redhat.com> wrote:  
> >> On Wed, 30 Aug 2017 22:18:13 -0700
> >> Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
> >>  
> >>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> >>>
> >>> This extends bridge fdb table tracepoints to also cover
> >>> learned fdb entries in the br_fdb_update path. Note that
> >>> unlike other tracepoints I have moved this to when the fdb
> >>> is modified because this is in the datapath and can generate
> >>> a lot of noise in the trace output. br_fdb_update is also called
> >>> from added_by_user context in the NTF_USE case which is already
> >>> traced ..hence the !added_by_user check.
> >>>
> >>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> >>> ---
> >>>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
> >>>  net/bridge/br_fdb.c           |  5 ++++-
> >>>  net/core/net-traces.c         |  1 +
> >>>  3 files changed, 36 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
> >>> index 0f1cde0..1bee3e7 100644
> >>> --- a/include/trace/events/bridge.h
> >>> +++ b/include/trace/events/bridge.h
> >>> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
> >>>                 __entry->addr[4], __entry->addr[5], __entry->vid)
> >>>  );
> >>>
> >>> +TRACE_EVENT(br_fdb_update,
> >>> +
> >>> +     TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
> >>> +              const unsigned char *addr, u16 vid, bool added_by_user),
> >>> +
> >>> +     TP_ARGS(br, source, addr, vid, added_by_user),
> >>> +
> >>> +     TP_STRUCT__entry(
> >>> +             __string(br_dev, br->dev->name)
> >>> +             __string(dev, source->dev->name)  
> >>
> >> I have found that using the device string name is
> >>
> >> (1) slow as it involves strcpy+strlen
> >>
> >>  See [1]+[2] where a single dev-name costed me 16 ns, and the base
> >>  overhead of a bpf attached tracepoint is 25 ns (see [3]).
> >>
> >>  [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
> >>  [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
> >>  [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64
> >>
> >> (2) strings are also harder to work-with/extract when attaching a bpf_prog
> >>
> >> See the trouble I'm in accessing a dev string here napi:napi_poll here:
> >>  https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58
> >>
> >> Using ifindex'es in userspace is fairly easy see man if_indextoname(3).
> >>  
> > 
> > Jesper thanks for the data!. GTK. Looking at include/trace/events,
> > currently almost all tracepoints use dev->name.

True, but with my recent experience and benchmarking, I consider this
generally a bad choice we have made for all these tracepoints.  In your
case with 2 strings, 2x16=32ns, you basically introduced a overhead
that is larger that to invocation cost.

> > These bridge tracepoints in context are primarily for debugging fdb
> > updates only, not for every packet and hence not in the performance
> > path.
> > In large scale deployments with thousands of bridge ports and fdb
> > entries, dev->name will definately make it easier to trouble-shoot.
> > So, I did like to leave these with dev->name unless there are strong objections.  
> 
> +1 for user friendliness for debugging tracepoints. The device name is
> also more user friendly when adding filters to the data collection.
>
> Being able to add bpf everywhere certainly changes the game a bit, but
> we should not relinquish ease of use and understanding for the potential
> that someone might want to put a bpf program on the tracepoint and want
> to maintain high performance.

(Cc. Acme and Peterz)
I wonder if we can create a special perf-tracepoint type for ifindex'es
and the tool reading (e.g. perf-script) can perform the name lookup in
userspace (calling if_indextoname(3)) ?

I don't know the perf tools well enough to know if this is possible?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 16:20         ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2017-08-31 16:20 UTC (permalink / raw)
  To: David Ahern
  Cc: Andrew Lunn, Florian Fainelli, Nikolay Aleksandrov, netdev,
	Roopa Prabhu, bridge, Arnaldo Carvalho de Melo, Peter Zijlstra,
	brouer, davem

On Thu, 31 Aug 2017 09:30:05 -0600
David Ahern <dsahern@gmail.com> wrote:

> On 8/31/17 9:21 AM, Roopa Prabhu wrote:
> > On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer
> > <brouer@redhat.com> wrote:  
> >> On Wed, 30 Aug 2017 22:18:13 -0700
> >> Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
> >>  
> >>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> >>>
> >>> This extends bridge fdb table tracepoints to also cover
> >>> learned fdb entries in the br_fdb_update path. Note that
> >>> unlike other tracepoints I have moved this to when the fdb
> >>> is modified because this is in the datapath and can generate
> >>> a lot of noise in the trace output. br_fdb_update is also called
> >>> from added_by_user context in the NTF_USE case which is already
> >>> traced ..hence the !added_by_user check.
> >>>
> >>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> >>> ---
> >>>  include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++
> >>>  net/bridge/br_fdb.c           |  5 ++++-
> >>>  net/core/net-traces.c         |  1 +
> >>>  3 files changed, 36 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
> >>> index 0f1cde0..1bee3e7 100644
> >>> --- a/include/trace/events/bridge.h
> >>> +++ b/include/trace/events/bridge.h
> >>> @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete,
> >>>                 __entry->addr[4], __entry->addr[5], __entry->vid)
> >>>  );
> >>>
> >>> +TRACE_EVENT(br_fdb_update,
> >>> +
> >>> +     TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
> >>> +              const unsigned char *addr, u16 vid, bool added_by_user),
> >>> +
> >>> +     TP_ARGS(br, source, addr, vid, added_by_user),
> >>> +
> >>> +     TP_STRUCT__entry(
> >>> +             __string(br_dev, br->dev->name)
> >>> +             __string(dev, source->dev->name)  
> >>
> >> I have found that using the device string name is
> >>
> >> (1) slow as it involves strcpy+strlen
> >>
> >>  See [1]+[2] where a single dev-name costed me 16 ns, and the base
> >>  overhead of a bpf attached tracepoint is 25 ns (see [3]).
> >>
> >>  [1] https://git.kernel.org/davem/net-next/c/e7d12ce121a
> >>  [2] https://git.kernel.org/davem/net-next/c/315ec3990ef
> >>  [3] https://git.kernel.org/davem/net-next/c/25d4dae1a64
> >>
> >> (2) strings are also harder to work-with/extract when attaching a bpf_prog
> >>
> >> See the trouble I'm in accessing a dev string here napi:napi_poll here:
> >>  https://github.com/netoptimizer/prototype-kernel/blob/103b955a080/kernel/samples/bpf/napi_monitor_kern.c#L52-L58
> >>
> >> Using ifindex'es in userspace is fairly easy see man if_indextoname(3).
> >>  
> > 
> > Jesper thanks for the data!. GTK. Looking at include/trace/events,
> > currently almost all tracepoints use dev->name.

True, but with my recent experience and benchmarking, I consider this
generally a bad choice we have made for all these tracepoints.  In your
case with 2 strings, 2x16=32ns, you basically introduced a overhead
that is larger that to invocation cost.

> > These bridge tracepoints in context are primarily for debugging fdb
> > updates only, not for every packet and hence not in the performance
> > path.
> > In large scale deployments with thousands of bridge ports and fdb
> > entries, dev->name will definately make it easier to trouble-shoot.
> > So, I did like to leave these with dev->name unless there are strong objections.  
> 
> +1 for user friendliness for debugging tracepoints. The device name is
> also more user friendly when adding filters to the data collection.
>
> Being able to add bpf everywhere certainly changes the game a bit, but
> we should not relinquish ease of use and understanding for the potential
> that someone might want to put a bpf program on the tracepoint and want
> to maintain high performance.

(Cc. Acme and Peterz)
I wonder if we can create a special perf-tracepoint type for ifindex'es
and the tool reading (e.g. perf-script) can perform the name lookup in
userspace (calling if_indextoname(3)) ?

I don't know the perf tools well enough to know if this is possible?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31  5:18 ` [Bridge] " Roopa Prabhu
@ 2017-08-31 18:43   ` David Miller
  -1 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2017-08-31 18:43 UTC (permalink / raw)
  To: roopa; +Cc: netdev, nikolay, f.fainelli, andrew, bridge

From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 30 Aug 2017 22:18:13 -0700

> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This extends bridge fdb table tracepoints to also cover
> learned fdb entries in the br_fdb_update path. Note that
> unlike other tracepoints I have moved this to when the fdb
> is modified because this is in the datapath and can generate
> a lot of noise in the trace output. br_fdb_update is also called
> from added_by_user context in the NTF_USE case which is already
> traced ..hence the !added_by_user check.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>

Applied.

Let's use dev->name for now and if the tooling can eventually
do transparent ifindex->name then we can consider redoing
a bunch of networking tracepoints.

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 18:43   ` David Miller
  0 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2017-08-31 18:43 UTC (permalink / raw)
  To: roopa; +Cc: nikolay, netdev, f.fainelli, bridge, andrew

From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 30 Aug 2017 22:18:13 -0700

> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> This extends bridge fdb table tracepoints to also cover
> learned fdb entries in the br_fdb_update path. Note that
> unlike other tracepoints I have moved this to when the fdb
> is modified because this is in the datapath and can generate
> a lot of noise in the trace output. br_fdb_update is also called
> from added_by_user context in the NTF_USE case which is already
> traced ..hence the !added_by_user check.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>

Applied.

Let's use dev->name for now and if the tooling can eventually
do transparent ifindex->name then we can consider redoing
a bunch of networking tracepoints.

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31 16:20         ` [Bridge] " Jesper Dangaard Brouer
@ 2017-08-31 18:57           ` Arnaldo Carvalho de Melo
  -1 siblings, 0 replies; 20+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-08-31 18:56 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: David Ahern, Roopa Prabhu, davem, netdev, Nikolay Aleksandrov,
	Florian Fainelli, Andrew Lunn, bridge, Peter Zijlstra

Em Thu, Aug 31, 2017 at 06:20:12PM +0200, Jesper Dangaard Brouer escreveu:
> On Thu, 31 Aug 2017 09:30:05 -0600 David Ahern <dsahern@gmail.com> wrote:
> > > On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer <brouer@redhat.com> wrote:  
> > > These bridge tracepoints in context are primarily for debugging fdb
> > > updates only, not for every packet and hence not in the performance
> > > path.
> > > In large scale deployments with thousands of bridge ports and fdb
> > > entries, dev->name will definately make it easier to trouble-shoot.
> > > So, I did like to leave these with dev->name unless there are strong objections.  

> > +1 for user friendliness for debugging tracepoints. The device name is
> > also more user friendly when adding filters to the data collection.

> > Being able to add bpf everywhere certainly changes the game a bit, but
> > we should not relinquish ease of use and understanding for the potential
> > that someone might want to put a bpf program on the tracepoint and want
> > to maintain high performance.
 
> (Cc. Acme and Peterz)
> I wonder if we can create a special perf-tracepoint type for ifindex'es
> and the tool reading (e.g. perf-script) can perform the name lookup in
> userspace (calling if_indextoname(3)) ?
 
> I don't know the perf tools well enough to know if this is possible?

Yeah, there are libtraceevent plugins, and that gets used by trace-cmd
and perf script, perf trace.

[root@jouet ~]# ls -la ~/.traceevent/plugins/
total 192
drwxr-xr-x. 2 acme acme  4096 Aug 31 15:29 .
drwxr-xr-x. 3 acme acme  4096 Jan 27  2017 ..
-rwxr-xr-x. 1 acme acme 13744 Aug 31 15:29 plugin_cfg80211.so
-rwxr-xr-x. 1 acme acme 20192 Aug 31 15:29 plugin_function.so
-rwxr-xr-x. 1 acme acme 13680 Aug 31 15:29 plugin_hrtimer.so
-rwxr-xr-x. 1 acme acme 13760 Aug 31 15:29 plugin_jbd2.so
-rwxr-xr-x. 1 acme acme 13704 Aug 31 15:29 plugin_kmem.so
-rwxr-xr-x. 1 acme acme 28568 Aug 31 15:29 plugin_kvm.so
-rwxr-xr-x. 1 acme acme 14184 Aug 31 15:29 plugin_mac80211.so
-rwxr-xr-x. 1 acme acme 14424 Aug 31 15:29 plugin_sched_switch.so
-rwxr-xr-x. 1 acme acme 20136 Aug 31 15:29 plugin_scsi.so
-rwxr-xr-x. 1 acme acme 14504 Aug 31 15:29 plugin_xen.so
[root@jouet ~]#

But... that index is something that is mutable, i.e. you'd have to
somehow record all the assignments of an index to an interface and then,
when processing the events, get from that state the mapping you want.

So you don't store the device name by doing lookups at each of those
high volume tracepoints, store just the index, but then, when
establishing the mapping, collect that as well and we come up with some
infrastructure to get that mapping in a place where a plugin can do the
lookups at post processing time.

For instance, the hrtimer plugin will get an address from the kernel,
and, from a state recorded at the same time as the trace file, will
lookup elf symbol tables for the kernel or modules and resolve that
symbol, etc.

- Arnaldo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 18:57           ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 20+ messages in thread
From: Arnaldo Carvalho de Melo @ 2017-08-31 18:57 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Andrew Lunn, Florian Fainelli, Nikolay Aleksandrov, netdev,
	Roopa Prabhu, bridge, Peter Zijlstra, David Ahern, davem

Em Thu, Aug 31, 2017 at 06:20:12PM +0200, Jesper Dangaard Brouer escreveu:
> On Thu, 31 Aug 2017 09:30:05 -0600 David Ahern <dsahern@gmail.com> wrote:
> > > On Thu, Aug 31, 2017 at 5:38 AM, Jesper Dangaard Brouer <brouer@redhat.com> wrote:  
> > > These bridge tracepoints in context are primarily for debugging fdb
> > > updates only, not for every packet and hence not in the performance
> > > path.
> > > In large scale deployments with thousands of bridge ports and fdb
> > > entries, dev->name will definately make it easier to trouble-shoot.
> > > So, I did like to leave these with dev->name unless there are strong objections.  

> > +1 for user friendliness for debugging tracepoints. The device name is
> > also more user friendly when adding filters to the data collection.

> > Being able to add bpf everywhere certainly changes the game a bit, but
> > we should not relinquish ease of use and understanding for the potential
> > that someone might want to put a bpf program on the tracepoint and want
> > to maintain high performance.
 
> (Cc. Acme and Peterz)
> I wonder if we can create a special perf-tracepoint type for ifindex'es
> and the tool reading (e.g. perf-script) can perform the name lookup in
> userspace (calling if_indextoname(3)) ?
 
> I don't know the perf tools well enough to know if this is possible?

Yeah, there are libtraceevent plugins, and that gets used by trace-cmd
and perf script, perf trace.

[root@jouet ~]# ls -la ~/.traceevent/plugins/
total 192
drwxr-xr-x. 2 acme acme  4096 Aug 31 15:29 .
drwxr-xr-x. 3 acme acme  4096 Jan 27  2017 ..
-rwxr-xr-x. 1 acme acme 13744 Aug 31 15:29 plugin_cfg80211.so
-rwxr-xr-x. 1 acme acme 20192 Aug 31 15:29 plugin_function.so
-rwxr-xr-x. 1 acme acme 13680 Aug 31 15:29 plugin_hrtimer.so
-rwxr-xr-x. 1 acme acme 13760 Aug 31 15:29 plugin_jbd2.so
-rwxr-xr-x. 1 acme acme 13704 Aug 31 15:29 plugin_kmem.so
-rwxr-xr-x. 1 acme acme 28568 Aug 31 15:29 plugin_kvm.so
-rwxr-xr-x. 1 acme acme 14184 Aug 31 15:29 plugin_mac80211.so
-rwxr-xr-x. 1 acme acme 14424 Aug 31 15:29 plugin_sched_switch.so
-rwxr-xr-x. 1 acme acme 20136 Aug 31 15:29 plugin_scsi.so
-rwxr-xr-x. 1 acme acme 14504 Aug 31 15:29 plugin_xen.so
[root@jouet ~]#

But... that index is something that is mutable, i.e. you'd have to
somehow record all the assignments of an index to an interface and then,
when processing the events, get from that state the mapping you want.

So you don't store the device name by doing lookups at each of those
high volume tracepoints, store just the index, but then, when
establishing the mapping, collect that as well and we come up with some
infrastructure to get that mapping in a place where a plugin can do the
lookups at post processing time.

For instance, the hrtimer plugin will get an address from the kernel,
and, from a state recorded at the same time as the trace file, will
lookup elf symbol tables for the kernel or modules and resolve that
symbol, etc.

- Arnaldo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31 18:43   ` [Bridge] " David Miller
@ 2017-08-31 21:50     ` Jesper Dangaard Brouer
  -1 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2017-08-31 21:50 UTC (permalink / raw)
  To: David Miller; +Cc: brouer, roopa, netdev, nikolay, f.fainelli, andrew, bridge

On Thu, 31 Aug 2017 11:43:25 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> Date: Wed, 30 Aug 2017 22:18:13 -0700
> 
> > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> > 
> > This extends bridge fdb table tracepoints to also cover
> > learned fdb entries in the br_fdb_update path. Note that
> > unlike other tracepoints I have moved this to when the fdb
> > is modified because this is in the datapath and can generate
> > a lot of noise in the trace output. br_fdb_update is also called
> > from added_by_user context in the NTF_USE case which is already
> > traced ..hence the !added_by_user check.
> > 
> > Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>  
> 
> Applied.
> 
> Let's use dev->name for now and if the tooling can eventually
> do transparent ifindex->name then we can consider redoing
> a bunch of networking tracepoints.

I agree! :-)

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 21:50     ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2017-08-31 21:50 UTC (permalink / raw)
  To: David Miller; +Cc: andrew, f.fainelli, nikolay, netdev, roopa, bridge, brouer

On Thu, 31 Aug 2017 11:43:25 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> Date: Wed, 30 Aug 2017 22:18:13 -0700
> 
> > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> > 
> > This extends bridge fdb table tracepoints to also cover
> > learned fdb entries in the br_fdb_update path. Note that
> > unlike other tracepoints I have moved this to when the fdb
> > is modified because this is in the datapath and can generate
> > a lot of noise in the trace output. br_fdb_update is also called
> > from added_by_user context in the NTF_USE case which is already
> > traced ..hence the !added_by_user check.
> > 
> > Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>  
> 
> Applied.
> 
> Let's use dev->name for now and if the tooling can eventually
> do transparent ifindex->name then we can consider redoing
> a bunch of networking tracepoints.

I agree! :-)

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH net-next] bridge: add tracepoint in br_fdb_update
  2017-08-31 21:50     ` [Bridge] " Jesper Dangaard Brouer
@ 2017-08-31 22:43       ` Stephen Hemminger
  -1 siblings, 0 replies; 20+ messages in thread
From: Stephen Hemminger @ 2017-08-31 22:43 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: David Miller, roopa, netdev, nikolay, f.fainelli, andrew, bridge

On Thu, 31 Aug 2017 23:50:26 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> On Thu, 31 Aug 2017 11:43:25 -0700 (PDT)
> David Miller <davem@davemloft.net> wrote:
> 
> > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> > Date: Wed, 30 Aug 2017 22:18:13 -0700
> >   
> > > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> > > 
> > > This extends bridge fdb table tracepoints to also cover
> > > learned fdb entries in the br_fdb_update path. Note that
> > > unlike other tracepoints I have moved this to when the fdb
> > > is modified because this is in the datapath and can generate
> > > a lot of noise in the trace output. br_fdb_update is also called
> > > from added_by_user context in the NTF_USE case which is already
> > > traced ..hence the !added_by_user check.
> > > 
> > > Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>    
> > 
> > Applied.
> > 
> > Let's use dev->name for now and if the tooling can eventually
> > do transparent ifindex->name then we can consider redoing
> > a bunch of networking tracepoints.  
> 
> I agree! :-)
> 

Agreed, but it is yet another case of tracepoints not having
a stable ABI. There is no ABI guarantee on tracing, but it still
makes for user complaints.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Bridge] [PATCH net-next] bridge: add tracepoint in br_fdb_update
@ 2017-08-31 22:43       ` Stephen Hemminger
  0 siblings, 0 replies; 20+ messages in thread
From: Stephen Hemminger @ 2017-08-31 22:43 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: andrew, f.fainelli, nikolay, netdev, roopa, bridge, David Miller

On Thu, 31 Aug 2017 23:50:26 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> On Thu, 31 Aug 2017 11:43:25 -0700 (PDT)
> David Miller <davem@davemloft.net> wrote:
> 
> > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> > Date: Wed, 30 Aug 2017 22:18:13 -0700
> >   
> > > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> > > 
> > > This extends bridge fdb table tracepoints to also cover
> > > learned fdb entries in the br_fdb_update path. Note that
> > > unlike other tracepoints I have moved this to when the fdb
> > > is modified because this is in the datapath and can generate
> > > a lot of noise in the trace output. br_fdb_update is also called
> > > from added_by_user context in the NTF_USE case which is already
> > > traced ..hence the !added_by_user check.
> > > 
> > > Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>    
> > 
> > Applied.
> > 
> > Let's use dev->name for now and if the tooling can eventually
> > do transparent ifindex->name then we can consider redoing
> > a bunch of networking tracepoints.  
> 
> I agree! :-)
> 

Agreed, but it is yet another case of tracepoints not having
a stable ABI. There is no ABI guarantee on tracing, but it still
makes for user complaints.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-08-31 22:43 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-31  5:18 [PATCH net-next] bridge: add tracepoint in br_fdb_update Roopa Prabhu
2017-08-31  5:18 ` [Bridge] " Roopa Prabhu
2017-08-31 12:38 ` Jesper Dangaard Brouer
2017-08-31 12:38   ` [Bridge] " Jesper Dangaard Brouer
2017-08-31 15:21   ` Roopa Prabhu
2017-08-31 15:21     ` [Bridge] " Roopa Prabhu
2017-08-31 15:30     ` David Ahern
2017-08-31 15:30       ` [Bridge] " David Ahern
2017-08-31 16:20       ` Jesper Dangaard Brouer
2017-08-31 16:20         ` [Bridge] " Jesper Dangaard Brouer
2017-08-31 18:56         ` Arnaldo Carvalho de Melo
2017-08-31 18:57           ` [Bridge] " Arnaldo Carvalho de Melo
2017-08-31 14:19 ` Nikolay Aleksandrov
2017-08-31 14:19   ` [Bridge] " Nikolay Aleksandrov
2017-08-31 18:43 ` David Miller
2017-08-31 18:43   ` [Bridge] " David Miller
2017-08-31 21:50   ` Jesper Dangaard Brouer
2017-08-31 21:50     ` [Bridge] " Jesper Dangaard Brouer
2017-08-31 22:43     ` Stephen Hemminger
2017-08-31 22:43       ` [Bridge] " Stephen Hemminger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.