bridge.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 net-next 0/4] net: switchdev: Tracepoints
@ 2024-02-23 11:44 Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 1/4] net: switchdev: Wrap enums in mapper macros Tobias Waldekranz
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-23 11:44 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, razor, bridge, netdev, jiri, ivecera, rostedt, mhiramat,
	linux-trace-kernel

Add a basic set of tracepoints to the switchdev layer that allows us
to monitor all messages being passed between a bridge and the devices
attached to it.

Deferred operations are additionally traced at the time they are
enqueued. This is useful in situations where we want to inspect the
conditions that lead to that message being generated, by looking at a
stacktrace for example.

Start off (1-2/4) by creating stringifiers for common switchdev
objects. This will primarily be used by the tracepoints for decoding
switchdev notifications, but drivers could also make use of them to
provide richer debug/error messages.

Then (3/4) create a common function through which all replay calls
pass, to create a natural point of instrumentation, before adding the
tracepoints themselves (4/4).

v2 -> v3:

Take a more conservative approach to the refactoring of
switchdev.c. In the end, I don't know that my previous attempt really
improved the situation much.

v1 -> v2:

- Fixup kernel-doc comment for switchdev_call_replay

Tobias Waldekranz (4):
  net: switchdev: Wrap enums in mapper macros
  net: switchdev: Add helpers to display switchdev objects as strings
  net: switchdev: Relay all replay messages through a central function
  net: switchdev: Add tracepoints

 include/net/switchdev.h          | 130 ++++++++++-----
 include/trace/events/switchdev.h |  74 ++++++++
 net/bridge/br_switchdev.c        |  10 +-
 net/switchdev/Makefile           |   2 +-
 net/switchdev/switchdev-str.c    | 278 +++++++++++++++++++++++++++++++
 net/switchdev/switchdev.c        |  87 +++++++++-
 6 files changed, 521 insertions(+), 60 deletions(-)
 create mode 100644 include/trace/events/switchdev.h
 create mode 100644 net/switchdev/switchdev-str.c

-- 
2.34.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3 net-next 1/4] net: switchdev: Wrap enums in mapper macros
  2024-02-23 11:44 [PATCH v3 net-next 0/4] net: switchdev: Tracepoints Tobias Waldekranz
@ 2024-02-23 11:44 ` Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 2/4] net: switchdev: Add helpers to display switchdev objects as strings Tobias Waldekranz
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-23 11:44 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, razor, bridge, netdev, jiri, ivecera, rostedt, mhiramat,
	linux-trace-kernel

In order to avoid having to maintain separate mappings from enum
values to strings, wrap enumerators in mapper macros, so that
enum-to-string tables can be generated.

This will make it easier to create helpers that convert switchdev
objects to strings.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 include/net/switchdev.h | 113 ++++++++++++++++++++++++----------------
 1 file changed, 68 insertions(+), 45 deletions(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8346b0d29542..86298a21c6c8 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,21 +16,28 @@
 #define SWITCHDEV_F_SKIP_EOPNOTSUPP	BIT(1)
 #define SWITCHDEV_F_DEFER		BIT(2)
 
+#define SWITCHDEV_ATTR_ID_MAPPER(_fn)	\
+	_fn(UNDEFINED),			\
+	_fn(PORT_STP_STATE),		\
+	_fn(PORT_MST_STATE),		\
+	_fn(PORT_BRIDGE_FLAGS),		\
+	_fn(PORT_PRE_BRIDGE_FLAGS),	\
+	_fn(PORT_MROUTER),		\
+	_fn(BRIDGE_AGEING_TIME),	\
+	_fn(BRIDGE_VLAN_FILTERING),	\
+	_fn(BRIDGE_VLAN_PROTOCOL),	\
+	_fn(BRIDGE_MC_DISABLED),	\
+	_fn(BRIDGE_MROUTER),		\
+	_fn(BRIDGE_MST),		\
+	_fn(MRP_PORT_ROLE),		\
+	_fn(VLAN_MSTI),			\
+	/*  */
+
+#define SWITCHDEV_ATTR_ID_ENUMERATOR(_id) \
+	SWITCHDEV_ATTR_ID_ ## _id
+
 enum switchdev_attr_id {
-	SWITCHDEV_ATTR_ID_UNDEFINED,
-	SWITCHDEV_ATTR_ID_PORT_STP_STATE,
-	SWITCHDEV_ATTR_ID_PORT_MST_STATE,
-	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
-	SWITCHDEV_ATTR_ID_PORT_MROUTER,
-	SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
-	SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
-	SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL,
-	SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
-	SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
-	SWITCHDEV_ATTR_ID_BRIDGE_MST,
-	SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
-	SWITCHDEV_ATTR_ID_VLAN_MSTI,
+	SWITCHDEV_ATTR_ID_MAPPER(SWITCHDEV_ATTR_ID_ENUMERATOR)
 };
 
 struct switchdev_mst_state {
@@ -69,18 +76,25 @@ struct switchdev_attr {
 	} u;
 };
 
+#define SWITCHDEV_OBJ_ID_MAPPER(_fn)	\
+	_fn(UNDEFINED),			\
+	_fn(PORT_VLAN),			\
+	_fn(PORT_MDB),			\
+	_fn(HOST_MDB),			\
+	_fn(MRP),			\
+	_fn(RING_TEST_MRP),		\
+	_fn(RING_ROLE_MRP),		\
+	_fn(RING_STATE_MRP),		\
+	_fn(IN_TEST_MRP),		\
+	_fn(IN_ROLE_MRP),		\
+	_fn(IN_STATE_MRP),		\
+	/*  */
+
+#define SWITCHDEV_OBJ_ID_ENUMERATOR(_id) \
+	SWITCHDEV_OBJ_ID_ ## _id
+
 enum switchdev_obj_id {
-	SWITCHDEV_OBJ_ID_UNDEFINED,
-	SWITCHDEV_OBJ_ID_PORT_VLAN,
-	SWITCHDEV_OBJ_ID_PORT_MDB,
-	SWITCHDEV_OBJ_ID_HOST_MDB,
-	SWITCHDEV_OBJ_ID_MRP,
-	SWITCHDEV_OBJ_ID_RING_TEST_MRP,
-	SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
-	SWITCHDEV_OBJ_ID_RING_STATE_MRP,
-	SWITCHDEV_OBJ_ID_IN_TEST_MRP,
-	SWITCHDEV_OBJ_ID_IN_ROLE_MRP,
-	SWITCHDEV_OBJ_ID_IN_STATE_MRP,
+	SWITCHDEV_OBJ_ID_MAPPER(SWITCHDEV_OBJ_ID_ENUMERATOR)
 };
 
 struct switchdev_obj {
@@ -209,27 +223,36 @@ struct switchdev_brport {
 	bool tx_fwd_offload;
 };
 
+#define SWITCHDEV_TYPE_MAPPER(_fn)	\
+	_fn(UNKNOWN),			\
+					\
+	_fn(FDB_ADD_TO_BRIDGE),		\
+	_fn(FDB_DEL_TO_BRIDGE),		\
+	_fn(FDB_ADD_TO_DEVICE),		\
+	_fn(FDB_DEL_TO_DEVICE),		\
+	_fn(FDB_OFFLOADED),		\
+	_fn(FDB_FLUSH_TO_BRIDGE),	\
+					\
+	_fn(PORT_OBJ_ADD),		\
+	_fn(PORT_OBJ_DEL),		\
+	_fn(PORT_ATTR_SET),		\
+					\
+	_fn(VXLAN_FDB_ADD_TO_BRIDGE),	\
+	_fn(VXLAN_FDB_DEL_TO_BRIDGE),	\
+	_fn(VXLAN_FDB_ADD_TO_DEVICE),	\
+	_fn(VXLAN_FDB_DEL_TO_DEVICE),	\
+	_fn(VXLAN_FDB_OFFLOADED),	\
+					\
+	_fn(BRPORT_OFFLOADED),		\
+	_fn(BRPORT_UNOFFLOADED),	\
+	_fn(BRPORT_REPLAY),		\
+	/*  */
+
+#define SWITCHDEV_TYPE_ENUMERATOR(_id) \
+	SWITCHDEV_ ## _id
+
 enum switchdev_notifier_type {
-	SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
-	SWITCHDEV_FDB_DEL_TO_BRIDGE,
-	SWITCHDEV_FDB_ADD_TO_DEVICE,
-	SWITCHDEV_FDB_DEL_TO_DEVICE,
-	SWITCHDEV_FDB_OFFLOADED,
-	SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
-
-	SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */
-	SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */
-	SWITCHDEV_PORT_ATTR_SET, /* May be blocking . */
-
-	SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE,
-	SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE,
-	SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
-	SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
-	SWITCHDEV_VXLAN_FDB_OFFLOADED,
-
-	SWITCHDEV_BRPORT_OFFLOADED,
-	SWITCHDEV_BRPORT_UNOFFLOADED,
-	SWITCHDEV_BRPORT_REPLAY,
+	SWITCHDEV_TYPE_MAPPER(SWITCHDEV_TYPE_ENUMERATOR)
 };
 
 struct switchdev_notifier_info {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 net-next 2/4] net: switchdev: Add helpers to display switchdev objects as strings
  2024-02-23 11:44 [PATCH v3 net-next 0/4] net: switchdev: Tracepoints Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 1/4] net: switchdev: Wrap enums in mapper macros Tobias Waldekranz
@ 2024-02-23 11:44 ` Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 3/4] net: switchdev: Relay all replay messages through a central function Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints Tobias Waldekranz
  3 siblings, 0 replies; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-23 11:44 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, razor, bridge, netdev, jiri, ivecera, rostedt, mhiramat,
	linux-trace-kernel

Useful both in error messages and in tracepoints.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 include/net/switchdev.h       |  14 ++
 net/switchdev/Makefile        |   2 +-
 net/switchdev/switchdev-str.c | 278 ++++++++++++++++++++++++++++++++++
 3 files changed, 293 insertions(+), 1 deletion(-)
 create mode 100644 net/switchdev/switchdev-str.c

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 86298a21c6c8..9bf387664e71 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -394,6 +394,20 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
 			int (*set_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack));
+
+/* switchdev-str.c */
+ssize_t switchdev_attr_str(const struct switchdev_attr *attr,
+			   char *buf, size_t len);
+ssize_t switchdev_obj_str(const struct switchdev_obj *obj,
+			  char *buf, size_t len);
+ssize_t switchdev_fdb_info_str(enum switchdev_notifier_type nt,
+			       const struct switchdev_notifier_fdb_info *fdbi,
+			       char *buf, size_t len);
+ssize_t switchdev_brport_str(const struct switchdev_brport *brport,
+			     char *buf, size_t len);
+ssize_t switchdev_notifier_str(enum switchdev_notifier_type nt,
+			       const struct switchdev_notifier_info *info,
+			       char *buf, size_t len);
 #else
 
 static inline int
diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
index c5561d7f3a7c..a40e4421087b 100644
--- a/net/switchdev/Makefile
+++ b/net/switchdev/Makefile
@@ -3,4 +3,4 @@
 # Makefile for the Switch device API
 #
 
-obj-y += switchdev.o
+obj-y += switchdev.o switchdev-str.o
diff --git a/net/switchdev/switchdev-str.c b/net/switchdev/switchdev-str.c
new file mode 100644
index 000000000000..a1fa7315cc28
--- /dev/null
+++ b/net/switchdev/switchdev-str.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/if_bridge.h>
+#include <net/switchdev.h>
+
+static ssize_t switchdev_str_write_id(char *buf, size_t len, unsigned long id,
+				      const char *const *names, size_t n_names)
+{
+	if (id < n_names && names[id])
+		return snprintf(buf, len, "%s", names[id]);
+
+	return snprintf(buf, len, "UNKNOWN<%lu>", id);
+}
+
+ssize_t switchdev_attr_str(const struct switchdev_attr *attr,
+			   char *buf, size_t len)
+{
+#define _ATTR_ID_STRINGER(_id) [SWITCHDEV_ATTR_ID_ ## _id] = #_id
+	static const char *const attr_id_strs[] = {
+		SWITCHDEV_ATTR_ID_MAPPER(_ATTR_ID_STRINGER)
+	};
+#undef _ATTR_ID_STRINGER
+
+	static const char *const stp_state_strs[] = {
+		[BR_STATE_DISABLED] = "disabled",
+		[BR_STATE_LISTENING] = "listening",
+		[BR_STATE_LEARNING] = "learning",
+		[BR_STATE_FORWARDING] = "forwarding",
+		[BR_STATE_BLOCKING] = "blocking",
+	};
+
+	char *cur = buf;
+	ssize_t n;
+
+	n = switchdev_str_write_id(cur, len, attr->id, attr_id_strs,
+				   ARRAY_SIZE(attr_id_strs));
+	if (n < 0)
+		return n;
+
+	cur += n;
+	len -= n;
+
+	n = snprintf(cur, len, "(flags %#x orig %s) ", attr->flags,
+		     attr->orig_dev ? netdev_name(attr->orig_dev) : "(null)");
+	if (n < 0)
+		return n;
+
+	cur += n;
+	len -= n;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		n = switchdev_str_write_id(cur, len, attr->u.stp_state,
+					   stp_state_strs, ARRAY_SIZE(stp_state_strs));
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_MST_STATE:
+		n = snprintf(cur, len, "msti %u", attr->u.mst_state.msti);
+		if (n < 0)
+			return n;
+
+		cur += n;
+		len -= n;
+
+		n = switchdev_str_write_id(cur, len, attr->u.mst_state.state,
+					   stp_state_strs, ARRAY_SIZE(stp_state_strs));
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		n = snprintf(cur, len, "val %#lx mask %#lx",
+			     attr->u.brport_flags.val,
+			     attr->u.brport_flags.mask);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_MROUTER:
+	case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+		n = snprintf(cur, len, "%s",
+			     attr->u.mrouter ? "enabled" : "disabled");
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		n = snprintf(cur, len, "%ums",
+			     jiffies_to_msecs(clock_t_to_jiffies(attr->u.ageing_time)));
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		n = snprintf(cur, len, "%s",
+			     attr->u.vlan_filtering ? "enabled" : "disabled");
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL:
+		n = snprintf(cur, len, "%#x", attr->u.vlan_protocol);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+		n = snprintf(cur, len, "%s",
+			     attr->u.mc_disabled ? "active" : "inactive");
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_MST:
+		n = snprintf(cur, len, "%s",
+			     attr->u.mst ? "enabled" : "disabled");
+		break;
+	case SWITCHDEV_ATTR_ID_VLAN_MSTI:
+		n = snprintf(cur, len, "vid %u msti %u",
+			     attr->u.vlan_msti.vid, attr->u.vlan_msti.msti);
+		break;
+	default:
+		/* Trim trailing space */
+		return --cur - buf;
+	}
+
+	if (n < 0)
+		return n;
+
+	cur += n;
+	return cur - buf;
+}
+EXPORT_SYMBOL_GPL(switchdev_attr_str);
+
+ssize_t switchdev_obj_str(const struct switchdev_obj *obj,
+			  char *buf, size_t len)
+{
+#define _OBJ_ID_STRINGER(_id) [SWITCHDEV_OBJ_ID_ ## _id] = #_id
+	static const char *const obj_id_strs[] = {
+		SWITCHDEV_OBJ_ID_MAPPER(_OBJ_ID_STRINGER)
+	};
+#undef _OBJ_ID_STRINGER
+
+	const struct switchdev_obj_port_vlan *vlan;
+	const struct switchdev_obj_port_mdb *mdb;
+	char *cur = buf;
+	ssize_t n;
+
+	n = switchdev_str_write_id(cur, len, obj->id, obj_id_strs,
+				   ARRAY_SIZE(obj_id_strs));
+	if (n < 0)
+		return n;
+
+	cur += n;
+	len -= n;
+
+	n = snprintf(cur, len, "(flags %#x orig %s) ", obj->flags,
+		     obj->orig_dev ? netdev_name(obj->orig_dev) : "(null)");
+	if (n < 0)
+		return n;
+
+	cur += n;
+	len -= n;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+		n = snprintf(cur, len, "vid %u flags %#x%s", vlan->vid,
+			     vlan->flags, vlan->changed ? "(changed)" : "");
+		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+	case SWITCHDEV_OBJ_ID_HOST_MDB:
+		mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+		n = snprintf(cur, len, "vid %u addr %pM", mdb->vid, mdb->addr);
+		break;
+	default:
+		/* Trim trailing space */
+		return --cur - buf;
+	}
+
+	if (n < 0)
+		return n;
+
+	cur += n;
+	return cur - buf;
+}
+EXPORT_SYMBOL_GPL(switchdev_obj_str);
+
+ssize_t switchdev_fdb_info_str(enum switchdev_notifier_type nt,
+			       const struct switchdev_notifier_fdb_info *fdbi,
+			       char *buf, size_t len)
+{
+	switch (nt) {
+	case SWITCHDEV_FDB_FLUSH_TO_BRIDGE:
+		return snprintf(buf, len, "vid %u", fdbi->vid);
+	case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+	case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+	case SWITCHDEV_FDB_OFFLOADED:
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+	case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+		return snprintf(buf, len, "vid %u addr %pM%s%s%s%s",
+				fdbi->vid, fdbi->addr,
+				fdbi->added_by_user ? " added_by_user" : "",
+				fdbi->is_local ? " is_local" : "",
+				fdbi->locked ? " locked" : "",
+				fdbi->offloaded ? " offloaded" : "");
+	default:
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(switchdev_fdb_info_str);
+
+ssize_t switchdev_brport_str(const struct switchdev_brport *brport,
+			     char *buf, size_t len)
+{
+	return snprintf(buf, len, "dev %s%s",
+			brport->dev ? netdev_name(brport->dev) : "(null)",
+			brport->tx_fwd_offload ? " tx_fwd_offload" : "");
+}
+EXPORT_SYMBOL_GPL(switchdev_brport_str);
+
+ssize_t switchdev_notifier_str(enum switchdev_notifier_type nt,
+			       const struct switchdev_notifier_info *info,
+			       char *buf, size_t len)
+{
+#define _TYPE_STRINGER(_id) [SWITCHDEV_ ## _id] = #_id
+	static const char *const type_strs[] = {
+		SWITCHDEV_TYPE_MAPPER(_TYPE_STRINGER)
+	};
+#undef _TYPE_STRINGER
+
+	const struct switchdev_notifier_port_attr_info *attri;
+	const struct switchdev_notifier_brport_info *brporti;
+	const struct switchdev_notifier_port_obj_info *obji;
+	const struct switchdev_notifier_fdb_info *fdbi;
+	char *cur = buf;
+	ssize_t n;
+
+	n = switchdev_str_write_id(cur, len, nt, type_strs,
+				   ARRAY_SIZE(type_strs));
+	if (n < 0)
+		return n;
+
+	cur += n;
+	len -= n;
+
+	if (len > 0) {
+		*cur++ = ' ';
+		len--;
+	}
+
+	switch (nt) {
+	case SWITCHDEV_FDB_FLUSH_TO_BRIDGE:
+	case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+	case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+	case SWITCHDEV_FDB_OFFLOADED:
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
+	case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+	case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+		fdbi = container_of(info, typeof(*fdbi), info);
+		n = switchdev_fdb_info_str(nt, fdbi, cur, len);
+		break;
+	case SWITCHDEV_PORT_OBJ_ADD:
+	case SWITCHDEV_PORT_OBJ_DEL:
+		obji = container_of(info, typeof(*obji), info);
+		n = switchdev_obj_str(obji->obj, cur, len);
+		break;
+	case SWITCHDEV_PORT_ATTR_SET:
+		attri = container_of(info, typeof(*attri), info);
+		n = switchdev_attr_str(attri->attr, cur, len);
+		break;
+	case SWITCHDEV_BRPORT_OFFLOADED:
+	case SWITCHDEV_BRPORT_UNOFFLOADED:
+	case SWITCHDEV_BRPORT_REPLAY:
+		brporti = container_of(info, typeof(*brporti), info);
+		n = switchdev_brport_str(&brporti->brport, cur, len);
+		break;
+	default:
+		/* Trim trailing space */
+		return --cur - buf;
+	}
+
+	if (n < 0)
+		return n;
+
+	cur += n;
+	return cur - buf;
+}
+EXPORT_SYMBOL_GPL(switchdev_notifier_str);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 net-next 3/4] net: switchdev: Relay all replay messages through a central function
  2024-02-23 11:44 [PATCH v3 net-next 0/4] net: switchdev: Tracepoints Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 1/4] net: switchdev: Wrap enums in mapper macros Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 2/4] net: switchdev: Add helpers to display switchdev objects as strings Tobias Waldekranz
@ 2024-02-23 11:44 ` Tobias Waldekranz
  2024-02-23 11:44 ` [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints Tobias Waldekranz
  3 siblings, 0 replies; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-23 11:44 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, razor, bridge, netdev, jiri, ivecera, rostedt, mhiramat,
	linux-trace-kernel

This will make it easier to add a tracepoint for all replay messages
later on.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 include/net/switchdev.h   |  3 +++
 net/bridge/br_switchdev.c | 10 +++++-----
 net/switchdev/switchdev.c | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 9bf387664e71..df4db720183f 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -340,6 +340,9 @@ int switchdev_port_obj_add(struct net_device *dev,
 int switchdev_port_obj_del(struct net_device *dev,
 			   const struct switchdev_obj *obj);
 
+int switchdev_call_replay(struct notifier_block *nb, unsigned long type,
+			  struct switchdev_notifier_info *info);
+
 int register_switchdev_notifier(struct notifier_block *nb);
 int unregister_switchdev_notifier(struct notifier_block *nb);
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 7b41ee8740cb..cc9ed7f6bf1c 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -306,7 +306,7 @@ br_switchdev_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
 
 	br_switchdev_fdb_populate(br, &item, fdb, ctx);
 
-	err = nb->notifier_call(nb, action, &item);
+	err = switchdev_call_replay(nb, action, &item.info);
 	return notifier_to_errno(err);
 }
 
@@ -376,8 +376,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
 			attr.u.vlan_msti.vid = v->vid;
 			attr.u.vlan_msti.msti = v->msti;
 
-			err = nb->notifier_call(nb, SWITCHDEV_PORT_ATTR_SET,
-						&attr_info);
+			err = switchdev_call_replay(nb, SWITCHDEV_PORT_ATTR_SET,
+						    &attr_info.info);
 			err = notifier_to_errno(err);
 			if (err)
 				return err;
@@ -404,7 +404,7 @@ br_switchdev_vlan_replay_one(struct notifier_block *nb,
 	};
 	int err;
 
-	err = nb->notifier_call(nb, action, &obj_info);
+	err = switchdev_call_replay(nb, action, &obj_info.info);
 	return notifier_to_errno(err);
 }
 
@@ -590,7 +590,7 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
 	};
 	int err;
 
-	err = nb->notifier_call(nb, action, &obj_info);
+	err = switchdev_call_replay(nb, action, &obj_info.info);
 	return notifier_to_errno(err);
 }
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index c9189a970eec..f73249269a87 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -380,6 +380,24 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
 
+/**
+ *	switchdev_call_replay - Replay switchdev message to driver
+ *
+ *	@nb: notifier block to send the message to
+ *	@type: value passed unmodified to notifier function
+ *	@info: notifier information data
+ *
+ *	Called by a bridge, in a response to a replay request
+ *	initiated by a port that is either attaching to, or detaching
+ *	from, that bridge.
+ */
+int switchdev_call_replay(struct notifier_block *nb, unsigned long type,
+			  struct switchdev_notifier_info *info)
+{
+	return nb->notifier_call(nb, type, info);
+}
+EXPORT_SYMBOL_GPL(switchdev_call_replay);
+
 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
 static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-23 11:44 [PATCH v3 net-next 0/4] net: switchdev: Tracepoints Tobias Waldekranz
                   ` (2 preceding siblings ...)
  2024-02-23 11:44 ` [PATCH v3 net-next 3/4] net: switchdev: Relay all replay messages through a central function Tobias Waldekranz
@ 2024-02-23 11:44 ` Tobias Waldekranz
  2024-02-23 15:38   ` Steven Rostedt
  3 siblings, 1 reply; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-23 11:44 UTC (permalink / raw)
  To: davem, kuba
  Cc: roopa, razor, bridge, netdev, jiri, ivecera, rostedt, mhiramat,
	linux-trace-kernel

Add a basic set of tracepoints:

- switchdev_defer: Fires whenever an operation is enqueued to the
  switchdev workqueue for deferred delivery.

- switchdev_call_{atomic,blocking}: Fires whenever a notification is
  sent to the corresponding switchdev notifier chain.

- switchdev_call_replay: Fires whenever a notification is sent to a
  specific driver's notifier block, in response to a replay request.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---
 include/trace/events/switchdev.h | 74 ++++++++++++++++++++++++++++++++
 net/switchdev/switchdev.c        | 71 +++++++++++++++++++++++++-----
 2 files changed, 135 insertions(+), 10 deletions(-)
 create mode 100644 include/trace/events/switchdev.h

diff --git a/include/trace/events/switchdev.h b/include/trace/events/switchdev.h
new file mode 100644
index 000000000000..dcaf6870d017
--- /dev/null
+++ b/include/trace/events/switchdev.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	switchdev
+
+#if !defined(_TRACE_SWITCHDEV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SWITCHDEV_H
+
+#include <linux/tracepoint.h>
+#include <net/switchdev.h>
+
+#define SWITCHDEV_TRACE_MSG_MAX 128
+
+DECLARE_EVENT_CLASS(switchdev_call,
+	TP_PROTO(unsigned long val,
+		 const struct switchdev_notifier_info *info,
+		 int err),
+
+	TP_ARGS(val, info, err),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, val)
+		__string(dev, info->dev ? netdev_name(info->dev) : "(null)")
+		__field(const struct switchdev_notifier_info *, info)
+		__field(int, err)
+		__array(char, msg, SWITCHDEV_TRACE_MSG_MAX)
+	),
+
+	TP_fast_assign(
+		__entry->val = val;
+		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
+		__entry->info = info;
+		__entry->err = err;
+		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);
+	),
+
+	TP_printk("dev %s %s -> %d", __get_str(dev), __entry->msg, __entry->err)
+);
+
+DEFINE_EVENT(switchdev_call, switchdev_defer,
+	TP_PROTO(unsigned long val,
+		 const struct switchdev_notifier_info *info,
+		 int err),
+
+	TP_ARGS(val, info, err)
+);
+
+DEFINE_EVENT(switchdev_call, switchdev_call_atomic,
+	TP_PROTO(unsigned long val,
+		 const struct switchdev_notifier_info *info,
+		 int err),
+
+	TP_ARGS(val, info, err)
+);
+
+DEFINE_EVENT(switchdev_call, switchdev_call_blocking,
+	TP_PROTO(unsigned long val,
+		 const struct switchdev_notifier_info *info,
+		 int err),
+
+	TP_ARGS(val, info, err)
+);
+
+DEFINE_EVENT(switchdev_call, switchdev_call_replay,
+	TP_PROTO(unsigned long val,
+		 const struct switchdev_notifier_info *info,
+		 int err),
+
+	TP_ARGS(val, info, err)
+);
+
+#endif /* _TRACE_SWITCHDEV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index f73249269a87..74d715166981 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,9 @@
 #include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/switchdev.h>
+
 static bool switchdev_obj_eq(const struct switchdev_obj *a,
 			     const struct switchdev_obj *b)
 {
@@ -180,8 +183,20 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev,
 static int switchdev_port_attr_set_defer(struct net_device *dev,
 					 const struct switchdev_attr *attr)
 {
-	return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
-					  switchdev_port_attr_set_deferred);
+	int err;
+
+	err = switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
+					 switchdev_port_attr_set_deferred);
+
+	if (trace_switchdev_defer_enabled()) {
+		struct switchdev_notifier_port_attr_info attr_info = {
+			.info.dev = dev,
+			.attr = attr,
+		};
+
+		trace_switchdev_defer(SWITCHDEV_PORT_ATTR_SET, &attr_info.info, err);
+	}
+	return err;
 }
 
 /**
@@ -263,8 +278,20 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
 static int switchdev_port_obj_add_defer(struct net_device *dev,
 					const struct switchdev_obj *obj)
 {
-	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
-					  switchdev_port_obj_add_deferred);
+	int err;
+
+	err = switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
+					 switchdev_port_obj_add_deferred);
+
+	if (trace_switchdev_defer_enabled()) {
+		struct switchdev_notifier_port_obj_info obj_info = {
+			.info.dev = dev,
+			.obj = obj,
+		};
+
+		trace_switchdev_defer(SWITCHDEV_PORT_OBJ_ADD, &obj_info.info, err);
+	}
+	return err;
 }
 
 /**
@@ -313,8 +340,20 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
 static int switchdev_port_obj_del_defer(struct net_device *dev,
 					const struct switchdev_obj *obj)
 {
-	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
-					  switchdev_port_obj_del_deferred);
+	int err;
+
+	err = switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
+					 switchdev_port_obj_del_deferred);
+
+	if (trace_switchdev_defer_enabled()) {
+		struct switchdev_notifier_port_obj_info obj_info = {
+			.info.dev = dev,
+			.obj = obj,
+		};
+
+		trace_switchdev_defer(SWITCHDEV_PORT_OBJ_DEL, &obj_info.info, err);
+	}
+	return err;
 }
 
 /**
@@ -394,7 +433,11 @@ EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
 int switchdev_call_replay(struct notifier_block *nb, unsigned long type,
 			  struct switchdev_notifier_info *info)
 {
-	return nb->notifier_call(nb, type, info);
+	int ret;
+
+	ret = nb->notifier_call(nb, type, info);
+	trace_switchdev_call_replay(type, info, notifier_to_errno(ret));
+	return ret;
 }
 EXPORT_SYMBOL_GPL(switchdev_call_replay);
 
@@ -437,9 +480,13 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 			     struct switchdev_notifier_info *info,
 			     struct netlink_ext_ack *extack)
 {
+	int ret;
+
 	info->dev = dev;
 	info->extack = extack;
-	return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
+	ret = atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
+	trace_switchdev_call_atomic(val, info, notifier_to_errno(ret));
+	return ret;
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
@@ -463,10 +510,14 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
 				      struct switchdev_notifier_info *info,
 				      struct netlink_ext_ack *extack)
 {
+	int ret;
+
 	info->dev = dev;
 	info->extack = extack;
-	return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
-					    val, info);
+	ret = blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
+					   val, info);
+	trace_switchdev_call_blocking(val, info, notifier_to_errno(ret));
+	return ret;
 }
 EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-23 11:44 ` [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints Tobias Waldekranz
@ 2024-02-23 15:38   ` Steven Rostedt
  2024-02-26 13:05     ` Tobias Waldekranz
  2024-02-27 10:04     ` Paolo Abeni
  0 siblings, 2 replies; 16+ messages in thread
From: Steven Rostedt @ 2024-02-23 15:38 UTC (permalink / raw)
  To: Tobias Waldekranz
  Cc: davem, kuba, roopa, razor, bridge, netdev, jiri, ivecera,
	mhiramat, linux-trace-kernel

On Fri, 23 Feb 2024 12:44:53 +0100
Tobias Waldekranz <tobias@waldekranz.com> wrote:

> Add a basic set of tracepoints:
> 
> - switchdev_defer: Fires whenever an operation is enqueued to the
>   switchdev workqueue for deferred delivery.
> 
> - switchdev_call_{atomic,blocking}: Fires whenever a notification is
>   sent to the corresponding switchdev notifier chain.
> 
> - switchdev_call_replay: Fires whenever a notification is sent to a
>   specific driver's notifier block, in response to a replay request.
> 
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> ---
>  include/trace/events/switchdev.h | 74 ++++++++++++++++++++++++++++++++
>  net/switchdev/switchdev.c        | 71 +++++++++++++++++++++++++-----
>  2 files changed, 135 insertions(+), 10 deletions(-)
>  create mode 100644 include/trace/events/switchdev.h
> 
> diff --git a/include/trace/events/switchdev.h b/include/trace/events/switchdev.h
> new file mode 100644
> index 000000000000..dcaf6870d017
> --- /dev/null
> +++ b/include/trace/events/switchdev.h
> @@ -0,0 +1,74 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM	switchdev
> +
> +#if !defined(_TRACE_SWITCHDEV_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_SWITCHDEV_H
> +
> +#include <linux/tracepoint.h>
> +#include <net/switchdev.h>
> +
> +#define SWITCHDEV_TRACE_MSG_MAX 128

128 bytes is awfully big to waste on the ring buffer. What's the average
size of a string?

> +
> +DECLARE_EVENT_CLASS(switchdev_call,
> +	TP_PROTO(unsigned long val,
> +		 const struct switchdev_notifier_info *info,
> +		 int err),
> +
> +	TP_ARGS(val, info, err),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned long, val)
> +		__string(dev, info->dev ? netdev_name(info->dev) : "(null)")
> +		__field(const struct switchdev_notifier_info *, info)
> +		__field(int, err)
> +		__array(char, msg, SWITCHDEV_TRACE_MSG_MAX)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->val = val;
> +		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
> +		__entry->info = info;
> +		__entry->err = err;
> +		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);

Is it possible to just store the information in the trace event and then
call the above function in the read stage? There's helpers to pass strings
around (namely a struct trace_seq *p).

It would require a plugin for libtraceevent if you want to expose it to
user space tools for trace-cmd and perf though.

Another possibility is if this event will not race with other events on he
same CPU, you could create a per-cpu buffer, write into that, and then use
__string() and __assign_str() to save it, as traces happen with preemption
disabled.

-- Steve

> +	),
> +
> +	TP_printk("dev %s %s -> %d", __get_str(dev), __entry->msg, __entry->err)
> +);
> +
> +DEFINE_EVENT(switchdev_call, switchdev_defer,
> +	TP_PROTO(unsigned long val,
> +		 const struct switchdev_notifier_info *info,
> +		 int err),
> +
> +	TP_ARGS(val, info, err)
> +);
> +
> +DEFINE_EVENT(switchdev_call, switchdev_call_atomic,
> +	TP_PROTO(unsigned long val,
> +		 const struct switchdev_notifier_info *info,
> +		 int err),
> +
> +	TP_ARGS(val, info, err)
> +);
> +
> +DEFINE_EVENT(switchdev_call, switchdev_call_blocking,
> +	TP_PROTO(unsigned long val,
> +		 const struct switchdev_notifier_info *info,
> +		 int err),
> +
> +	TP_ARGS(val, info, err)
> +);
> +
> +DEFINE_EVENT(switchdev_call, switchdev_call_replay,
> +	TP_PROTO(unsigned long val,
> +		 const struct switchdev_notifier_info *info,
> +		 int err),
> +
> +	TP_ARGS(val, info, err)
> +);
> +
> +#endif /* _TRACE_SWITCHDEV_H */
> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-23 15:38   ` Steven Rostedt
@ 2024-02-26 13:05     ` Tobias Waldekranz
  2024-02-27 10:04     ` Paolo Abeni
  1 sibling, 0 replies; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-26 13:05 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: davem, kuba, roopa, razor, bridge, netdev, jiri, ivecera,
	mhiramat, linux-trace-kernel

On fre, feb 23, 2024 at 10:38, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Fri, 23 Feb 2024 12:44:53 +0100
> Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
>> Add a basic set of tracepoints:
>> 
>> - switchdev_defer: Fires whenever an operation is enqueued to the
>>   switchdev workqueue for deferred delivery.
>> 
>> - switchdev_call_{atomic,blocking}: Fires whenever a notification is
>>   sent to the corresponding switchdev notifier chain.
>> 
>> - switchdev_call_replay: Fires whenever a notification is sent to a
>>   specific driver's notifier block, in response to a replay request.
>> 
>> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
>> ---
>>  include/trace/events/switchdev.h | 74 ++++++++++++++++++++++++++++++++
>>  net/switchdev/switchdev.c        | 71 +++++++++++++++++++++++++-----
>>  2 files changed, 135 insertions(+), 10 deletions(-)
>>  create mode 100644 include/trace/events/switchdev.h
>> 
>> diff --git a/include/trace/events/switchdev.h b/include/trace/events/switchdev.h
>> new file mode 100644
>> index 000000000000..dcaf6870d017
>> --- /dev/null
>> +++ b/include/trace/events/switchdev.h
>> @@ -0,0 +1,74 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM	switchdev
>> +
>> +#if !defined(_TRACE_SWITCHDEV_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_SWITCHDEV_H
>> +
>> +#include <linux/tracepoint.h>
>> +#include <net/switchdev.h>
>> +
>> +#define SWITCHDEV_TRACE_MSG_MAX 128
>
> 128 bytes is awfully big to waste on the ring buffer. What's the average
> size of a string?

I would say the typical message is around 60-80 bytes. Some common examples:

    PORT_OBJ_ADD PORT_VLAN(flags 0x0 orig br0) vid 1 flags 0x27
    PORT_OBJ_DEL HOST_MDB(flags 0x0 orig br0) vid 100 addr 33:33:ff:ff:00:09

The worst case I can think of currently is 95 characters:

    VXLAN_FDB_ADD_TO_DEVICE vid 1000 addr de:ad:be:ef:00:01 added_by_user is_local locked offloaded

>> +
>> +DECLARE_EVENT_CLASS(switchdev_call,
>> +	TP_PROTO(unsigned long val,
>> +		 const struct switchdev_notifier_info *info,
>> +		 int err),
>> +
>> +	TP_ARGS(val, info, err),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(unsigned long, val)
>> +		__string(dev, info->dev ? netdev_name(info->dev) : "(null)")
>> +		__field(const struct switchdev_notifier_info *, info)
>> +		__field(int, err)
>> +		__array(char, msg, SWITCHDEV_TRACE_MSG_MAX)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->val = val;
>> +		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
>> +		__entry->info = info;
>> +		__entry->err = err;
>> +		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);
>
> Is it possible to just store the information in the trace event and then
> call the above function in the read stage? There's helpers to pass strings
> around (namely a struct trace_seq *p).

I'm a complete novice when it comes to tracepoint internals. Am I right
in assuming that TP_printk may execute at a much later time than
TP_fast_assign? E.g., the object referenced by __entry->info may very
well have been freed by that time? That at least seems to be what my
naive refactor to replace __entry->msg with __get_buf() suggests :)

If so, the layout of the switchdev_notifier_* structs makes it a bit
cumbersome to clone the notification in the assign phase, as the size of
a specific notification (e.g., switchdev_notifier_port_obj_info) is not
known by the common notification (switchdev_notifier_info). In the case
of switchdev objects, the problem repeats a second time. E.g., the size
of switchdev_obj_port_vlan is not known by switchdev_obj.

> It would require a plugin for libtraceevent if you want to expose it to
> user space tools for trace-cmd and perf though.
>
> Another possibility is if this event will not race with other events on he
> same CPU, you could create a per-cpu buffer, write into that, and then use
> __string() and __assign_str() to save it, as traces happen with preemption
> disabled.

But bottom halves are still enabled I suppose? Notifications can be
generated both from process context (e.g., users configuring the bridge
with iproute2), and from bridge packet processing (e.g., adding new
neighbors to the FDB). So I don't think that would work in this case.

> -- Steve

Thanks for taking the time!

>> +	),
>> +
>> +	TP_printk("dev %s %s -> %d", __get_str(dev), __entry->msg, __entry->err)
>> +);
>> +
>> +DEFINE_EVENT(switchdev_call, switchdev_defer,
>> +	TP_PROTO(unsigned long val,
>> +		 const struct switchdev_notifier_info *info,
>> +		 int err),
>> +
>> +	TP_ARGS(val, info, err)
>> +);
>> +
>> +DEFINE_EVENT(switchdev_call, switchdev_call_atomic,
>> +	TP_PROTO(unsigned long val,
>> +		 const struct switchdev_notifier_info *info,
>> +		 int err),
>> +
>> +	TP_ARGS(val, info, err)
>> +);
>> +
>> +DEFINE_EVENT(switchdev_call, switchdev_call_blocking,
>> +	TP_PROTO(unsigned long val,
>> +		 const struct switchdev_notifier_info *info,
>> +		 int err),
>> +
>> +	TP_ARGS(val, info, err)
>> +);
>> +
>> +DEFINE_EVENT(switchdev_call, switchdev_call_replay,
>> +	TP_PROTO(unsigned long val,
>> +		 const struct switchdev_notifier_info *info,
>> +		 int err),
>> +
>> +	TP_ARGS(val, info, err)
>> +);
>> +
>> +#endif /* _TRACE_SWITCHDEV_H */
>> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-23 15:38   ` Steven Rostedt
  2024-02-26 13:05     ` Tobias Waldekranz
@ 2024-02-27 10:04     ` Paolo Abeni
  2024-02-28 10:47       ` Tobias Waldekranz
  1 sibling, 1 reply; 16+ messages in thread
From: Paolo Abeni @ 2024-02-27 10:04 UTC (permalink / raw)
  To: Steven Rostedt, Tobias Waldekranz
  Cc: davem, kuba, roopa, razor, bridge, netdev, jiri, ivecera,
	mhiramat, linux-trace-kernel

On Fri, 2024-02-23 at 10:38 -0500, Steven Rostedt wrote:
> On Fri, 23 Feb 2024 12:44:53 +0100
> Tobias Waldekranz <tobias@waldekranz.com> wrote:
> 
> > Add a basic set of tracepoints:
> > 
> > - switchdev_defer: Fires whenever an operation is enqueued to the
> >   switchdev workqueue for deferred delivery.
> > 
> > - switchdev_call_{atomic,blocking}: Fires whenever a notification is
> >   sent to the corresponding switchdev notifier chain.
> > 
> > - switchdev_call_replay: Fires whenever a notification is sent to a
> >   specific driver's notifier block, in response to a replay request.
> > 
> > Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> > ---
> >  include/trace/events/switchdev.h | 74 ++++++++++++++++++++++++++++++++
> >  net/switchdev/switchdev.c        | 71 +++++++++++++++++++++++++-----
> >  2 files changed, 135 insertions(+), 10 deletions(-)
> >  create mode 100644 include/trace/events/switchdev.h
> > 
> > diff --git a/include/trace/events/switchdev.h b/include/trace/events/switchdev.h
> > new file mode 100644
> > index 000000000000..dcaf6870d017
> > --- /dev/null
> > +++ b/include/trace/events/switchdev.h
> > @@ -0,0 +1,74 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM	switchdev
> > +
> > +#if !defined(_TRACE_SWITCHDEV_H) || defined(TRACE_HEADER_MULTI_READ)
> > +#define _TRACE_SWITCHDEV_H
> > +
> > +#include <linux/tracepoint.h>
> > +#include <net/switchdev.h>
> > +
> > +#define SWITCHDEV_TRACE_MSG_MAX 128
> 
> 128 bytes is awfully big to waste on the ring buffer. What's the average
> size of a string?
> 
> > +
> > +DECLARE_EVENT_CLASS(switchdev_call,
> > +	TP_PROTO(unsigned long val,
> > +		 const struct switchdev_notifier_info *info,
> > +		 int err),
> > +
> > +	TP_ARGS(val, info, err),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(unsigned long, val)
> > +		__string(dev, info->dev ? netdev_name(info->dev) : "(null)")
> > +		__field(const struct switchdev_notifier_info *, info)
> > +		__field(int, err)
> > +		__array(char, msg, SWITCHDEV_TRACE_MSG_MAX)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__entry->val = val;
> > +		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
> > +		__entry->info = info;
> > +		__entry->err = err;
> > +		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);
> 
> Is it possible to just store the information in the trace event and then
> call the above function in the read stage?

I agree with Steven: it looks like that with the above code the
tracepoint itself will become measurably costily in terms of CPU
cycles: we want to avoid that.

Perhaps using different tracepoints with different notifier_block type
would help? so that each trace point could just copy a few specific
fields.

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-27 10:04     ` Paolo Abeni
@ 2024-02-28 10:47       ` Tobias Waldekranz
  2024-02-28 14:56         ` Steven Rostedt
  0 siblings, 1 reply; 16+ messages in thread
From: Tobias Waldekranz @ 2024-02-28 10:47 UTC (permalink / raw)
  To: Paolo Abeni, Steven Rostedt
  Cc: davem, kuba, roopa, razor, bridge, netdev, jiri, ivecera,
	mhiramat, linux-trace-kernel

On tis, feb 27, 2024 at 11:04, Paolo Abeni <pabeni@redhat.com> wrote:
> On Fri, 2024-02-23 at 10:38 -0500, Steven Rostedt wrote:
>> On Fri, 23 Feb 2024 12:44:53 +0100
>> Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> 
>> > Add a basic set of tracepoints:
>> > 
>> > - switchdev_defer: Fires whenever an operation is enqueued to the
>> >   switchdev workqueue for deferred delivery.
>> > 
>> > - switchdev_call_{atomic,blocking}: Fires whenever a notification is
>> >   sent to the corresponding switchdev notifier chain.
>> > 
>> > - switchdev_call_replay: Fires whenever a notification is sent to a
>> >   specific driver's notifier block, in response to a replay request.
>> > 
>> > Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
>> > ---
>> >  include/trace/events/switchdev.h | 74 ++++++++++++++++++++++++++++++++
>> >  net/switchdev/switchdev.c        | 71 +++++++++++++++++++++++++-----
>> >  2 files changed, 135 insertions(+), 10 deletions(-)
>> >  create mode 100644 include/trace/events/switchdev.h
>> > 
>> > diff --git a/include/trace/events/switchdev.h b/include/trace/events/switchdev.h
>> > new file mode 100644
>> > index 000000000000..dcaf6870d017
>> > --- /dev/null
>> > +++ b/include/trace/events/switchdev.h
>> > @@ -0,0 +1,74 @@
>> > +/* SPDX-License-Identifier: GPL-2.0 */
>> > +#undef TRACE_SYSTEM
>> > +#define TRACE_SYSTEM	switchdev
>> > +
>> > +#if !defined(_TRACE_SWITCHDEV_H) || defined(TRACE_HEADER_MULTI_READ)
>> > +#define _TRACE_SWITCHDEV_H
>> > +
>> > +#include <linux/tracepoint.h>
>> > +#include <net/switchdev.h>
>> > +
>> > +#define SWITCHDEV_TRACE_MSG_MAX 128
>> 
>> 128 bytes is awfully big to waste on the ring buffer. What's the average
>> size of a string?
>> 
>> > +
>> > +DECLARE_EVENT_CLASS(switchdev_call,
>> > +	TP_PROTO(unsigned long val,
>> > +		 const struct switchdev_notifier_info *info,
>> > +		 int err),
>> > +
>> > +	TP_ARGS(val, info, err),
>> > +
>> > +	TP_STRUCT__entry(
>> > +		__field(unsigned long, val)
>> > +		__string(dev, info->dev ? netdev_name(info->dev) : "(null)")
>> > +		__field(const struct switchdev_notifier_info *, info)
>> > +		__field(int, err)
>> > +		__array(char, msg, SWITCHDEV_TRACE_MSG_MAX)
>> > +	),
>> > +
>> > +	TP_fast_assign(
>> > +		__entry->val = val;
>> > +		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
>> > +		__entry->info = info;
>> > +		__entry->err = err;
>> > +		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);
>> 
>> Is it possible to just store the information in the trace event and then
>> call the above function in the read stage?
>
> I agree with Steven: it looks like that with the above code the
> tracepoint itself will become measurably costily in terms of CPU
> cycles: we want to avoid that.
>
> Perhaps using different tracepoints with different notifier_block type
> would help? so that each trace point could just copy a few specific
> fields.

This can be done, but you will end up having to duplicate the decoding
and formatting logic from switchdev-str.c, with the additional hurdle of
having to figure out the sizes of all referenced objects in order to
create flattened versions of every notification type.

What I like about the current approach is that when new notification and
object types are added, switchdev_notifier_str will automatically be
able to decode them and give you some rough idea of what is going on,
even if no new message specific decoding logic is added. It is also
reusable by drivers that might want to decode notifications or objects
in error messages.

Would some variant of (how I understand) Steven's suggestion to instead
store the formatted message in a dynamic array (__assign_str()), rather
than in the tracepoint entry, be acceptable?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-28 10:47       ` Tobias Waldekranz
@ 2024-02-28 14:56         ` Steven Rostedt
  2024-03-04 22:40           ` Tobias Waldekranz
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2024-02-28 14:56 UTC (permalink / raw)
  To: Tobias Waldekranz
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On Wed, 28 Feb 2024 11:47:24 +0100
Tobias Waldekranz <tobias@waldekranz.com> wrote:

> >> > +	TP_fast_assign(
> >> > +		__entry->val = val;
> >> > +		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
> >> > +		__entry->info = info;
> >> > +		__entry->err = err;
> >> > +		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);  
> >> 
> >> Is it possible to just store the information in the trace event and then
> >> call the above function in the read stage?  
> >
> > I agree with Steven: it looks like that with the above code the
> > tracepoint itself will become measurably costily in terms of CPU
> > cycles: we want to avoid that.
> >
> > Perhaps using different tracepoints with different notifier_block type
> > would help? so that each trace point could just copy a few specific
> > fields.  
> 
> This can be done, but you will end up having to duplicate the decoding
> and formatting logic from switchdev-str.c, with the additional hurdle of
> having to figure out the sizes of all referenced objects in order to
> create flattened versions of every notification type.

Would it help if you could pass a trace_seq to it? The TP_printk() has a
"magical" trace_seq variable that trace events can use in the TP_printk()
called "p".

Look at:

  include/trace/events/libata.h:

const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
#define __parse_status(s) libata_trace_parse_status(p, s)

Where we have:

const char *
libata_trace_parse_status(struct trace_seq *p, unsigned char status)
{
	const char *ret = trace_seq_buffer_ptr(p);

	trace_seq_printf(p, "{ ");
	if (status & ATA_BUSY)
		trace_seq_printf(p, "BUSY ");
	if (status & ATA_DRDY)
		trace_seq_printf(p, "DRDY ");
	if (status & ATA_DF)
		trace_seq_printf(p, "DF ");
	if (status & ATA_DSC)
		trace_seq_printf(p, "DSC ");
	if (status & ATA_DRQ)
		trace_seq_printf(p, "DRQ ");
	if (status & ATA_CORR)
		trace_seq_printf(p, "CORR ");
	if (status & ATA_SENSE)
		trace_seq_printf(p, "SENSE ");
	if (status & ATA_ERR)
		trace_seq_printf(p, "ERR ");
	trace_seq_putc(p, '}');
	trace_seq_putc(p, 0);

	return ret;
}

The "trace_seq p" is a pointer to trace_seq descriptor that can build
strings, and then you can use it to print a custom string in the trace
output.



> 
> What I like about the current approach is that when new notification and
> object types are added, switchdev_notifier_str will automatically be
> able to decode them and give you some rough idea of what is going on,
> even if no new message specific decoding logic is added. It is also
> reusable by drivers that might want to decode notifications or objects
> in error messages.
> 
> Would some variant of (how I understand) Steven's suggestion to instead
> store the formatted message in a dynamic array (__assign_str()), rather
> than in the tracepoint entry, be acceptable?

Matters if you could adapt using a trace_seq for the output. Or at least
use a seq_buf, as that's what is under the covers of trace_seq. If you
rather just use seq_buf, the above could pretty much be the same by passing
in: &p->seq.

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-02-28 14:56         ` Steven Rostedt
@ 2024-03-04 22:40           ` Tobias Waldekranz
  2024-03-06 15:15             ` Steven Rostedt
  0 siblings, 1 reply; 16+ messages in thread
From: Tobias Waldekranz @ 2024-03-04 22:40 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On ons, feb 28, 2024 at 09:56, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Wed, 28 Feb 2024 11:47:24 +0100
> Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
>> >> > +	TP_fast_assign(
>> >> > +		__entry->val = val;
>> >> > +		__assign_str(dev, info->dev ? netdev_name(info->dev) : "(null)");
>> >> > +		__entry->info = info;
>> >> > +		__entry->err = err;
>> >> > +		switchdev_notifier_str(val, info, __entry->msg, SWITCHDEV_TRACE_MSG_MAX);  
>> >> 
>> >> Is it possible to just store the information in the trace event and then
>> >> call the above function in the read stage?  
>> >
>> > I agree with Steven: it looks like that with the above code the
>> > tracepoint itself will become measurably costily in terms of CPU
>> > cycles: we want to avoid that.
>> >
>> > Perhaps using different tracepoints with different notifier_block type
>> > would help? so that each trace point could just copy a few specific
>> > fields.  
>> 
>> This can be done, but you will end up having to duplicate the decoding
>> and formatting logic from switchdev-str.c, with the additional hurdle of
>> having to figure out the sizes of all referenced objects in order to
>> create flattened versions of every notification type.
>
> Would it help if you could pass a trace_seq to it? The TP_printk() has a
> "magical" trace_seq variable that trace events can use in the TP_printk()
> called "p".
>
> Look at:
>
>   include/trace/events/libata.h:
>
> const char *libata_trace_parse_status(struct trace_seq*, unsigned char);
> #define __parse_status(s) libata_trace_parse_status(p, s)
>
> Where we have:
>
> const char *
> libata_trace_parse_status(struct trace_seq *p, unsigned char status)
> {
> 	const char *ret = trace_seq_buffer_ptr(p);
>
> 	trace_seq_printf(p, "{ ");
> 	if (status & ATA_BUSY)
> 		trace_seq_printf(p, "BUSY ");
> 	if (status & ATA_DRDY)
> 		trace_seq_printf(p, "DRDY ");
> 	if (status & ATA_DF)
> 		trace_seq_printf(p, "DF ");
> 	if (status & ATA_DSC)
> 		trace_seq_printf(p, "DSC ");
> 	if (status & ATA_DRQ)
> 		trace_seq_printf(p, "DRQ ");
> 	if (status & ATA_CORR)
> 		trace_seq_printf(p, "CORR ");
> 	if (status & ATA_SENSE)
> 		trace_seq_printf(p, "SENSE ");
> 	if (status & ATA_ERR)
> 		trace_seq_printf(p, "ERR ");
> 	trace_seq_putc(p, '}');
> 	trace_seq_putc(p, 0);
>
> 	return ret;
> }
>
> The "trace_seq p" is a pointer to trace_seq descriptor that can build
> strings, and then you can use it to print a custom string in the trace
> output.

Yes I managed to decode the hidden variable :) I also found
trace_seq_acquire() (and its macro alter ego __get_buf()), which would
let me keep the generic stringer functions. So far, so good.

I think the foundational problem remains though: TP_printk() is not
executed until a user reads from the trace_pipe; at which point the
object referenced by __entry->info may already be dead and
buried. Right?

>> 
>> What I like about the current approach is that when new notification and
>> object types are added, switchdev_notifier_str will automatically be
>> able to decode them and give you some rough idea of what is going on,
>> even if no new message specific decoding logic is added. It is also
>> reusable by drivers that might want to decode notifications or objects
>> in error messages.
>> 
>> Would some variant of (how I understand) Steven's suggestion to instead
>> store the formatted message in a dynamic array (__assign_str()), rather
>> than in the tracepoint entry, be acceptable?
>
> Matters if you could adapt using a trace_seq for the output. Or at least
> use a seq_buf, as that's what is under the covers of trace_seq. If you
> rather just use seq_buf, the above could pretty much be the same by passing
> in: &p->seq.
>
> -- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-03-04 22:40           ` Tobias Waldekranz
@ 2024-03-06 15:15             ` Steven Rostedt
  2024-03-06 20:02               ` Tobias Waldekranz
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2024-03-06 15:15 UTC (permalink / raw)
  To: Tobias Waldekranz
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On Mon, 04 Mar 2024 23:40:49 +0100
Tobias Waldekranz <tobias@waldekranz.com> wrote:

> On ons, feb 28, 2024 at 09:56, Steven Rostedt <rostedt@goodmis.org> wrote:
> > On Wed, 28 Feb 2024 11:47:24 +0100
> > Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >  
> > The "trace_seq p" is a pointer to trace_seq descriptor that can build
> > strings, and then you can use it to print a custom string in the trace
> > output.  
> 
> Yes I managed to decode the hidden variable :) I also found
> trace_seq_acquire() (and its macro alter ego __get_buf()), which would
> let me keep the generic stringer functions. So far, so good.
> 
> I think the foundational problem remains though: TP_printk() is not
> executed until a user reads from the trace_pipe; at which point the
> object referenced by __entry->info may already be dead and
> buried. Right?

Correct. You would need to load all the information into the event data
itself, at the time of the event is triggered, that is needed to determine
how to display it.

-- Steve


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-03-06 15:15             ` Steven Rostedt
@ 2024-03-06 20:02               ` Tobias Waldekranz
  2024-03-06 21:46                 ` Steven Rostedt
  0 siblings, 1 reply; 16+ messages in thread
From: Tobias Waldekranz @ 2024-03-06 20:02 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On ons, mar 06, 2024 at 10:15, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Mon, 04 Mar 2024 23:40:49 +0100
> Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
>> On ons, feb 28, 2024 at 09:56, Steven Rostedt <rostedt@goodmis.org> wrote:
>> > On Wed, 28 Feb 2024 11:47:24 +0100
>> > Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> >  
>> > The "trace_seq p" is a pointer to trace_seq descriptor that can build
>> > strings, and then you can use it to print a custom string in the trace
>> > output.  
>> 
>> Yes I managed to decode the hidden variable :) I also found
>> trace_seq_acquire() (and its macro alter ego __get_buf()), which would
>> let me keep the generic stringer functions. So far, so good.
>> 
>> I think the foundational problem remains though: TP_printk() is not
>> executed until a user reads from the trace_pipe; at which point the
>> object referenced by __entry->info may already be dead and
>> buried. Right?
>
> Correct. You would need to load all the information into the event data
> itself, at the time of the event is triggered, that is needed to determine
> how to display it.

Given that that is quite gnarly to do for the events I'm trying to
trace, because of the complex object graph, would it be acceptable to
format the message in the assign phase and store it as dynamic data?
I.e., what (I think) you suggested at the end of your first response.

My thinking is:

- Managing a duplicate (flattened) object graph, exclusively for use by
  these tracepoints, increases the effort to keep the tracing in sync
  with new additions to switchdev; which I think will result in
  developers simply avoiding it altogether. In other words: I'd rather
  have somewhat inefficient but simple flashlight, rather than a very
  efficient one that no one knows how to change the batteries in.

- This is typically not a very hot path. Most events are triggered by
  user configuration. Otherwise when new neighbors are discovered.

- __entry->info is still there for use by raw tracepoint consumers from
  userspace.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-03-06 20:02               ` Tobias Waldekranz
@ 2024-03-06 21:46                 ` Steven Rostedt
  2024-03-06 22:45                   ` Tobias Waldekranz
  0 siblings, 1 reply; 16+ messages in thread
From: Steven Rostedt @ 2024-03-06 21:46 UTC (permalink / raw)
  To: Tobias Waldekranz
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On Wed, 06 Mar 2024 21:02:06 +0100
Tobias Waldekranz <tobias@waldekranz.com> wrote:

> On ons, mar 06, 2024 at 10:15, Steven Rostedt <rostedt@goodmis.org> wrote:
> > On Mon, 04 Mar 2024 23:40:49 +0100
> > Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >  
> >> On ons, feb 28, 2024 at 09:56, Steven Rostedt <rostedt@goodmis.org> wrote:  
> >> > On Wed, 28 Feb 2024 11:47:24 +0100
> >> > Tobias Waldekranz <tobias@waldekranz.com> wrote:
> >> >  
> >> > The "trace_seq p" is a pointer to trace_seq descriptor that can build
> >> > strings, and then you can use it to print a custom string in the trace
> >> > output.    
> >> 
> >> Yes I managed to decode the hidden variable :) I also found
> >> trace_seq_acquire() (and its macro alter ego __get_buf()), which would
> >> let me keep the generic stringer functions. So far, so good.
> >> 
> >> I think the foundational problem remains though: TP_printk() is not
> >> executed until a user reads from the trace_pipe; at which point the
> >> object referenced by __entry->info may already be dead and
> >> buried. Right?  
> >
> > Correct. You would need to load all the information into the event data
> > itself, at the time of the event is triggered, that is needed to determine
> > how to display it.  
> 
> Given that that is quite gnarly to do for the events I'm trying to
> trace, because of the complex object graph, would it be acceptable to
> format the message in the assign phase and store it as dynamic data?
> I.e., what (I think) you suggested at the end of your first response.

It's really up to what you want to do ;-)

> 
> My thinking is:
> 
> - Managing a duplicate (flattened) object graph, exclusively for use by
>   these tracepoints, increases the effort to keep the tracing in sync
>   with new additions to switchdev; which I think will result in
>   developers simply avoiding it altogether. In other words: I'd rather
>   have somewhat inefficient but simple flashlight, rather than a very
>   efficient one that no one knows how to change the batteries in.
> 
> - This is typically not a very hot path. Most events are triggered by
>   user configuration. Otherwise when new neighbors are discovered.
> 
> - __entry->info is still there for use by raw tracepoint consumers from
>   userspace.

How big is this info?

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-03-06 21:46                 ` Steven Rostedt
@ 2024-03-06 22:45                   ` Tobias Waldekranz
  2024-03-06 23:33                     ` Steven Rostedt
  0 siblings, 1 reply; 16+ messages in thread
From: Tobias Waldekranz @ 2024-03-06 22:45 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On ons, mar 06, 2024 at 16:46, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Wed, 06 Mar 2024 21:02:06 +0100
> Tobias Waldekranz <tobias@waldekranz.com> wrote:
>
>> On ons, mar 06, 2024 at 10:15, Steven Rostedt <rostedt@goodmis.org> wrote:
>> > On Mon, 04 Mar 2024 23:40:49 +0100
>> > Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> >  
>> >> On ons, feb 28, 2024 at 09:56, Steven Rostedt <rostedt@goodmis.org> wrote:  
>> >> > On Wed, 28 Feb 2024 11:47:24 +0100
>> >> > Tobias Waldekranz <tobias@waldekranz.com> wrote:
>> >> >  
>> >> > The "trace_seq p" is a pointer to trace_seq descriptor that can build
>> >> > strings, and then you can use it to print a custom string in the trace
>> >> > output.    
>> >> 
>> >> Yes I managed to decode the hidden variable :) I also found
>> >> trace_seq_acquire() (and its macro alter ego __get_buf()), which would
>> >> let me keep the generic stringer functions. So far, so good.
>> >> 
>> >> I think the foundational problem remains though: TP_printk() is not
>> >> executed until a user reads from the trace_pipe; at which point the
>> >> object referenced by __entry->info may already be dead and
>> >> buried. Right?  
>> >
>> > Correct. You would need to load all the information into the event data
>> > itself, at the time of the event is triggered, that is needed to determine
>> > how to display it.  
>> 
>> Given that that is quite gnarly to do for the events I'm trying to
>> trace, because of the complex object graph, would it be acceptable to
>> format the message in the assign phase and store it as dynamic data?
>> I.e., what (I think) you suggested at the end of your first response.
>
> It's really up to what you want to do ;-)

Alright. I'll interpret that as "there's a >0% chance that I'll give you
an Acked-by on something like that" :)

>> 
>> My thinking is:
>> 
>> - Managing a duplicate (flattened) object graph, exclusively for use by
>>   these tracepoints, increases the effort to keep the tracing in sync
>>   with new additions to switchdev; which I think will result in
>>   developers simply avoiding it altogether. In other words: I'd rather
>>   have somewhat inefficient but simple flashlight, rather than a very
>>   efficient one that no one knows how to change the batteries in.
>> 
>> - This is typically not a very hot path. Most events are triggered by
>>   user configuration. Otherwise when new neighbors are discovered.
>> 
>> - __entry->info is still there for use by raw tracepoint consumers from
>>   userspace.
>
> How big is this info?

The common struct (switchdev_notifier_info) is 24B at the
moment. Depending on __entry->val, the size of the enclosing
notification (e.g. switchdev_notifier_port_obj_info) is between
40-64B. This pattern may then repeat again inside the concrete notifier,
where you have a pointer to a common object (e.g. switchdev_obj, 48B)
whose outer size (e.g. switchdev_obj_port_vlan, 56B) is determined by an
accompanying enum.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints
  2024-03-06 22:45                   ` Tobias Waldekranz
@ 2024-03-06 23:33                     ` Steven Rostedt
  0 siblings, 0 replies; 16+ messages in thread
From: Steven Rostedt @ 2024-03-06 23:33 UTC (permalink / raw)
  To: Tobias Waldekranz
  Cc: Paolo Abeni, davem, kuba, roopa, razor, bridge, netdev, jiri,
	ivecera, mhiramat, linux-trace-kernel

On Wed, 06 Mar 2024 23:45:57 +0100
Tobias Waldekranz <tobias@waldekranz.com> wrote:

> > How big is this info?  
> 
> The common struct (switchdev_notifier_info) is 24B at the
> moment. Depending on __entry->val, the size of the enclosing
> notification (e.g. switchdev_notifier_port_obj_info) is between
> 40-64B. This pattern may then repeat again inside the concrete notifier,
> where you have a pointer to a common object (e.g. switchdev_obj, 48B)
> whose outer size (e.g. switchdev_obj_port_vlan, 56B) is determined by an
> accompanying enum.

As long as it's under 1K, you should be good.

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2024-03-06 23:31 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-23 11:44 [PATCH v3 net-next 0/4] net: switchdev: Tracepoints Tobias Waldekranz
2024-02-23 11:44 ` [PATCH v3 net-next 1/4] net: switchdev: Wrap enums in mapper macros Tobias Waldekranz
2024-02-23 11:44 ` [PATCH v3 net-next 2/4] net: switchdev: Add helpers to display switchdev objects as strings Tobias Waldekranz
2024-02-23 11:44 ` [PATCH v3 net-next 3/4] net: switchdev: Relay all replay messages through a central function Tobias Waldekranz
2024-02-23 11:44 ` [PATCH v3 net-next 4/4] net: switchdev: Add tracepoints Tobias Waldekranz
2024-02-23 15:38   ` Steven Rostedt
2024-02-26 13:05     ` Tobias Waldekranz
2024-02-27 10:04     ` Paolo Abeni
2024-02-28 10:47       ` Tobias Waldekranz
2024-02-28 14:56         ` Steven Rostedt
2024-03-04 22:40           ` Tobias Waldekranz
2024-03-06 15:15             ` Steven Rostedt
2024-03-06 20:02               ` Tobias Waldekranz
2024-03-06 21:46                 ` Steven Rostedt
2024-03-06 22:45                   ` Tobias Waldekranz
2024-03-06 23:33                     ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).