All of lore.kernel.org
 help / color / mirror / Atom feed
* [bpf-next PATCH 0/3] Add ID to bpf_map/prog tracepoints
@ 2018-04-18 15:30 Sebastiano Miano
  2018-04-18 15:30 ` [bpf-next PATCH 1/3] bpf: add id to map tracepoint Sebastiano Miano
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Sebastiano Miano @ 2018-04-18 15:30 UTC (permalink / raw)
  To: netdev, ast, daniel; +Cc: mingo, rostedt, brouer, fulvio.risso

The following series:
1) Add ID to both map and prog related tracepoints
2) Add a sample program that shows how to monitor and 
   filter map related events using their IDs.

---

Sebastiano Miano (3):
      bpf: add id to map tracepoint
      bpf: add id to prog tracepoint
      bpf: add sample program to trace map events


 include/trace/events/bpf.h          |   44 ++++-
 samples/bpf/Makefile                |    4 
 samples/bpf/trace_map_events_kern.c |  217 ++++++++++++++++++++++++
 samples/bpf/trace_map_events_user.c |  314 +++++++++++++++++++++++++++++++++++
 4 files changed, 569 insertions(+), 10 deletions(-)
 create mode 100644 samples/bpf/trace_map_events_kern.c
 create mode 100644 samples/bpf/trace_map_events_user.c

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [bpf-next PATCH 1/3] bpf: add id to map tracepoint
  2018-04-18 15:30 [bpf-next PATCH 0/3] Add ID to bpf_map/prog tracepoints Sebastiano Miano
@ 2018-04-18 15:30 ` Sebastiano Miano
  2018-04-19  9:08   ` Jesper Dangaard Brouer
  2018-04-18 15:30 ` [bpf-next PATCH 2/3] bpf: add id to prog tracepoint Sebastiano Miano
  2018-04-18 15:30 ` [bpf-next PATCH 3/3] bpf: add sample program to trace map events Sebastiano Miano
  2 siblings, 1 reply; 12+ messages in thread
From: Sebastiano Miano @ 2018-04-18 15:30 UTC (permalink / raw)
  To: netdev, ast, daniel; +Cc: mingo, rostedt, brouer, fulvio.risso

This patch adds the map id to the bpf tracepoints
that can be used when monitoring or inspecting map
related functions.

Signed-off-by: Sebastiano Miano <sebastiano.miano@polito.it>
Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/trace/events/bpf.h |   29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index 1501856..d7c9726 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -130,6 +130,7 @@ TRACE_EVENT(bpf_map_create,
 		__field(u32, max_entries)
 		__field(u32, flags)
 		__field(int, ufd)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
@@ -139,9 +140,11 @@ TRACE_EVENT(bpf_map_create,
 		__entry->max_entries = map->max_entries;
 		__entry->flags       = map->map_flags;
 		__entry->ufd         = ufd;
+		__entry->id          = map->id;
 	),
 
-	TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x",
+	TP_printk("id=%u type=%s ufd=%d key=%u val=%u max=%u flags=%x",
+		  __entry->id,
 		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
 		  __entry->ufd, __entry->size_key, __entry->size_value,
 		  __entry->max_entries, __entry->flags)
@@ -199,17 +202,20 @@ DECLARE_EVENT_CLASS(bpf_obj_map,
 		__field(u32, type)
 		__field(int, ufd)
 		__string(path, pname->name)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
 		__assign_str(path, pname->name);
 		__entry->type = map->map_type;
 		__entry->ufd  = ufd;
+		__entry->id   = map->id;
 	),
 
-	TP_printk("map type=%s ufd=%d path=%s",
-		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
-		  __entry->ufd, __get_str(path))
+	TP_printk("map id=%u type=%s ufd=%d path=%s",
+		__entry->id,
+		__print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
+		__entry->ufd, __get_str(path))
 );
 
 DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map,
@@ -244,6 +250,7 @@ DECLARE_EVENT_CLASS(bpf_map_keyval,
 		__dynamic_array(u8, val, map->value_size)
 		__field(bool, val_trunc)
 		__field(int, ufd)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
@@ -255,9 +262,11 @@ DECLARE_EVENT_CLASS(bpf_map_keyval,
 		__entry->val_len   = min(map->value_size, 16U);
 		__entry->val_trunc = map->value_size != __entry->val_len;
 		__entry->ufd       = ufd;
+		__entry->id        = map->id;
 	),
 
-	TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]",
+	TP_printk("map id=%d type=%s ufd=%d key=[%s%s] val=[%s%s]",
+		  __entry->id,
 		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
 		  __entry->ufd,
 		  __print_hex(__get_dynamic_array(key), __entry->key_len),
@@ -295,6 +304,7 @@ TRACE_EVENT(bpf_map_delete_elem,
 		__dynamic_array(u8, key, map->key_size)
 		__field(bool, key_trunc)
 		__field(int, ufd)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
@@ -303,9 +313,11 @@ TRACE_EVENT(bpf_map_delete_elem,
 		__entry->key_len   = min(map->key_size, 16U);
 		__entry->key_trunc = map->key_size != __entry->key_len;
 		__entry->ufd       = ufd;
+		__entry->id        = map->id;
 	),
 
-	TP_printk("map type=%s ufd=%d key=[%s%s]",
+	TP_printk("map id=%d type=%s ufd=%d key=[%s%s]",
+		  __entry->id,
 		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
 		  __entry->ufd,
 		  __print_hex(__get_dynamic_array(key), __entry->key_len),
@@ -327,6 +339,7 @@ TRACE_EVENT(bpf_map_next_key,
 		__field(bool, key_trunc)
 		__field(bool, key_null)
 		__field(int, ufd)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
@@ -338,9 +351,11 @@ TRACE_EVENT(bpf_map_next_key,
 		__entry->key_len   = min(map->key_size, 16U);
 		__entry->key_trunc = map->key_size != __entry->key_len;
 		__entry->ufd       = ufd;
+		__entry->id        = map->id;
 	),
 
-	TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
+	TP_printk("map id=%d type=%s ufd=%d key=[%s%s] next=[%s%s]",
+		  __entry->id,
 		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
 		  __entry->ufd,
 		  __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [bpf-next PATCH 2/3] bpf: add id to prog tracepoint
  2018-04-18 15:30 [bpf-next PATCH 0/3] Add ID to bpf_map/prog tracepoints Sebastiano Miano
  2018-04-18 15:30 ` [bpf-next PATCH 1/3] bpf: add id to map tracepoint Sebastiano Miano
@ 2018-04-18 15:30 ` Sebastiano Miano
  2018-04-19  9:10   ` Jesper Dangaard Brouer
  2018-04-18 15:30 ` [bpf-next PATCH 3/3] bpf: add sample program to trace map events Sebastiano Miano
  2 siblings, 1 reply; 12+ messages in thread
From: Sebastiano Miano @ 2018-04-18 15:30 UTC (permalink / raw)
  To: netdev, ast, daniel; +Cc: mingo, rostedt, brouer, fulvio.risso

This patch adds the prog id to the bpf tracepoints
that can be used when monitoring or inspecting prog
related functions.

Signed-off-by: Sebastiano Miano <sebastiano.miano@polito.it>
Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/trace/events/bpf.h |   15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index d7c9726..4ec19c5 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -65,16 +65,19 @@ DECLARE_EVENT_CLASS(bpf_prog_event,
 	TP_STRUCT__entry(
 		__array(u8, prog_tag, 8)
 		__field(u32, type)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
 		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag));
 		memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
 		__entry->type = prg->type;
+		__entry->id   = prg->aux->id;
 	),
 
-	TP_printk("prog=%s type=%s",
+	TP_printk("prog=%s id=%u type=%s",
 		  __print_hex_str(__entry->prog_tag, 8),
+		  __entry->id,
 		  __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB))
 );
 
@@ -102,6 +105,7 @@ TRACE_EVENT(bpf_prog_load,
 		__array(u8, prog_tag, 8)
 		__field(u32, type)
 		__field(int, ufd)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
@@ -109,10 +113,12 @@ TRACE_EVENT(bpf_prog_load,
 		memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
 		__entry->type = prg->type;
 		__entry->ufd  = ufd;
+		__entry->id   = prg->aux->id;
 	),
 
-	TP_printk("prog=%s type=%s ufd=%d",
+	TP_printk("prog=%s id=%u type=%s ufd=%d",
 		  __print_hex_str(__entry->prog_tag, 8),
+		  __entry->id,
 		  __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB),
 		  __entry->ufd)
 );
@@ -161,6 +167,7 @@ DECLARE_EVENT_CLASS(bpf_obj_prog,
 		__array(u8, prog_tag, 8)
 		__field(int, ufd)
 		__string(path, pname->name)
+		__field(u32, id)
 	),
 
 	TP_fast_assign(
@@ -168,10 +175,12 @@ DECLARE_EVENT_CLASS(bpf_obj_prog,
 		memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag));
 		__assign_str(path, pname->name);
 		__entry->ufd = ufd;
+		__entry->id  = prg->aux->id;
 	),
 
-	TP_printk("prog=%s path=%s ufd=%d",
+	TP_printk("prog=%s id=%u path=%s ufd=%d",
 		  __print_hex_str(__entry->prog_tag, 8),
+		  __entry->id,
 		  __get_str(path), __entry->ufd)
 );
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-18 15:30 [bpf-next PATCH 0/3] Add ID to bpf_map/prog tracepoints Sebastiano Miano
  2018-04-18 15:30 ` [bpf-next PATCH 1/3] bpf: add id to map tracepoint Sebastiano Miano
  2018-04-18 15:30 ` [bpf-next PATCH 2/3] bpf: add id to prog tracepoint Sebastiano Miano
@ 2018-04-18 15:30 ` Sebastiano Miano
  2018-04-19  9:20   ` Jesper Dangaard Brouer
  2018-04-20  0:27   ` Alexei Starovoitov
  2 siblings, 2 replies; 12+ messages in thread
From: Sebastiano Miano @ 2018-04-18 15:30 UTC (permalink / raw)
  To: netdev, ast, daniel; +Cc: mingo, rostedt, brouer, fulvio.risso

This patch adds a sample program, called trace_map_events,
that shows how to capture map events and filter them based on
the map id.

The program accepts a list of map IDs, via the -i command line
option, and filters all the map events related to those IDs (i.e.,
map_create/update/lookup/next_key).
If no IDs are specified, all map events are listed and no filtering
is performed.

Sample usage:

 # trace_map_events -i <map_id1> -i <map_id2> -i <map_id3> ...

Signed-off-by: Sebastiano Miano <sebastiano.miano@polito.it>
---
 samples/bpf/Makefile                |    4 
 samples/bpf/trace_map_events_kern.c |  225 +++++++++++++++++++++++++
 samples/bpf/trace_map_events_user.c |  314 +++++++++++++++++++++++++++++++++++
 3 files changed, 543 insertions(+)
 create mode 100644 samples/bpf/trace_map_events_kern.c
 create mode 100644 samples/bpf/trace_map_events_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6ed..a7d52b6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -15,6 +15,7 @@ hostprogs-y += tracex6
 hostprogs-y += tracex7
 hostprogs-y += test_probe_write_user
 hostprogs-y += trace_output
+hostprogs-y += trace_map_events
 hostprogs-y += lathist
 hostprogs-y += offwaketime
 hostprogs-y += spintest
@@ -65,6 +66,7 @@ tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
 load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
 test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
 trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
+trace_map_events-objs := bpf_load.o $(LIBBPF) trace_map_events_user.o
 lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
 offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
 spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
@@ -111,6 +113,7 @@ always += tracex7_kern.o
 always += sock_flags_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
+always += trace_map_events_kern.o
 always += tcbpf1_kern.o
 always += tcbpf2_kern.o
 always += tc_l2_redirect_kern.o
@@ -171,6 +174,7 @@ HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
 HOSTLOADLIBES_load_sock_ops += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
+HOSTLOADLIBES_trace_map_events += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
 HOSTLOADLIBES_offwaketime += -lelf
 HOSTLOADLIBES_spintest += -lelf
diff --git a/samples/bpf/trace_map_events_kern.c b/samples/bpf/trace_map_events_kern.c
new file mode 100644
index 0000000..f887b5b
--- /dev/null
+++ b/samples/bpf/trace_map_events_kern.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2018 Politecnico di Torino, Italy
+ *
+ * Author: Sebastiano Miano <sebastiano.miano@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+enum map_event_type {
+	MAP_CREATE = 0,
+	MAP_UPDATE = 1,
+	MAP_LOOKUP = 2,
+	MAP_NEXT_KEY = 3
+};
+
+struct map_event_data {
+	u32 map_id;
+	enum map_event_type evnt_type;
+	u32 map_type;
+};
+
+struct bpf_map_def SEC("maps") map_event_trace = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = 64,
+};
+
+struct bpf_map_def SEC("maps") filtered_ids = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u32),
+	.max_entries = 64,
+};
+
+struct bpf_map_def SEC("maps") filter_events = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(bool),
+	.max_entries = 1,
+};
+
+/*
+ * Tracepoint format: /sys/kernel/debug/tracing/events/bpf/bpf_map_create/format
+ * Code in:                kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_create_ctx {
+	u64 pad;		// First 8 bytes are not accessible by bpf code
+	u32 type;		// offset:8;	size:4;	signed:0;
+	u32 size_key;		// offset:12;	size:4;	signed:0;
+	u32 size_value;		// offset:16;	size:4;	signed:0;
+	u32 max_entries;	// offset:20;	size:4;	signed:0;
+	u32 flags;		// offset:24;	size:4;	signed:0;
+	int ufd;		// offset:28;	size:4;	signed:1;
+	u32 id;			// offset:32;	size:4;	signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_create")
+int trace_bpf_map_create(struct bpf_map_create_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_CREATE;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+/*
+ * Tracepoint:	/sys/kernel/debug/tracing/events/bpf/bpf_map_lookup_elem/format
+ * Tracepoint:	/sys/kernel/debug/tracing/events/bpf/bpf_map_update_elem/format
+ * Code in:          kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_keyval_ctx {
+	u64 pad;		// First 8 bytes are not accessible by bpf code
+	u32 type;		// offset:8;	size:4;	signed:0;
+	u32 key_len;		// offset:12;	size:4;	signed:0;
+	u32 key;		// offset:16;	size:4;	signed:0;
+	bool key_trunc;		// offset:20;	size:1;	signed:0;
+	u32 val_len;		// offset:24;	size:4;	signed:0;
+	u32 val;		// offset:28;	size:4;	signed:0;
+	bool val_trunc;		// offset:32;	size:1;	signed:0;
+	int ufd;		// offset:36;	size:4;	signed:1;
+	u32 id;			// offset:40;	size:4;	signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_lookup_elem")
+int trace_bpf_map_lookup_elem(struct bpf_map_keyval_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_LOOKUP;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+SEC("tracepoint/bpf/bpf_map_update_elem")
+int trace_bpf_map_update_elem(struct bpf_map_keyval_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_UPDATE;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+/*
+ * Tracepoint:	/sys/kernel/debug/tracing/events/bpf/bpf_map_next_key/format
+ * Code in:          kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_next_key_ctx {
+	u64 pad;		// First 8 bytes are not accessible by bpf code
+	u32 type;		// offset:8;	size:4;	signed:0;
+	u32 key_len;		// offset:12;	size:4;	signed:0;
+	u32 key;		// offset:16;	size:4;	signed:0;
+	u32 nxt;		// offset:20;	size:4;	signed:0;
+	bool key_trunc;		// offset:24;	size:1;	signed:0;
+	bool key_null;		// offset:25;	size:1;	signed:0;
+	int ufd;		// offset:28;	size:4;	signed:1;
+	u32 id;			// offset:32;	size:4;	signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_next_key")
+int trace_bpf_map_next_key(struct bpf_map_next_key_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_NEXT_KEY;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/trace_map_events_user.c b/samples/bpf/trace_map_events_user.c
new file mode 100644
index 0000000..bc7447e
--- /dev/null
+++ b/samples/bpf/trace_map_events_user.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2018 Politecnico di Torino, Italy
+ *
+ * Author: Sebastiano Miano <sebastiano.miano@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+static const char *__desc__ =
+"Sample program to trace map related events\n"
+"The -i option allows to set the id(s) of the map you are interested in.\n"
+"If no ID is specified, all map events are listed.\n";
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/resource.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include <getopt.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "perf-sys.h"
+
+#define MAX_FILTERED_IDS 64
+
+static int *perf_fd;
+
+int epoll_fd;
+int page_size;
+int page_cnt = 8;
+volatile struct perf_event_mmap_page **readers;
+
+typedef void (*event_cb)(void *data, int size);
+
+enum map_event_type {
+	MAP_CREATE = 0,
+	MAP_UPDATE = 1,
+	MAP_LOOKUP = 2,
+	MAP_NEXT_KEY = 3
+};
+
+static void usage(char *argv[])
+{
+	printf("\nDESCRIPTION:\n%s", __desc__);
+	printf("\n");
+	printf(" Usage: %s [-i map_id1] [-i map_id2] ...\n", argv[0]);
+	printf("\n");
+}
+
+static int perf_event_mmap(int fd, int cpu)
+{
+	void *base;
+	int mmap_size;
+
+	page_size = getpagesize();
+	mmap_size = page_size * (page_cnt + 1);
+
+	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (base == MAP_FAILED) {
+		printf("mmap err\n");
+		return -1;
+	}
+
+	readers[cpu] = base;
+	return 0;
+}
+
+static void init_bpf_perf_event_on_cpu(int cpu)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.sample_period = 1,
+		.wakeup_events = 1,
+	};
+	int key = cpu;
+
+	perf_fd[cpu] = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+
+	assert(perf_fd[cpu] >= 0);
+	assert(perf_event_mmap(perf_fd[cpu], cpu) >= 0);
+	assert(ioctl(perf_fd[cpu], PERF_EVENT_IOC_ENABLE, 0) >= 0);
+	assert(bpf_map_update_elem(map_fd[0], &key, &perf_fd[cpu], 0) == 0);
+
+	struct epoll_event e = { .events = EPOLLIN, .data.u32 = cpu };
+
+	assert(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, perf_fd[cpu], &e) == 0);
+}
+
+static int perf_event_poll(int fd, int num_cpus, struct epoll_event *events)
+{
+	return epoll_wait(fd, events, num_cpus, -1);
+}
+
+struct perf_event_sample {
+	struct perf_event_header header;
+	__u32 size;
+	char data[];
+};
+
+static void perf_event_read(event_cb fn, __u32 index)
+{
+	__u64 data_tail = readers[index]->data_tail;
+	__u64 data_head = readers[index]->data_head;
+	__u64 buffer_size = page_cnt * page_size;
+	void *base, *begin, *end;
+	char buf[256];
+
+	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+	if (data_head == data_tail)
+		return;
+
+	base = ((char *)readers[index]) + page_size;
+
+	begin = base + data_tail % buffer_size;
+	end = base + data_head % buffer_size;
+
+	while (begin != end) {
+		struct perf_event_sample *e;
+
+		e = begin;
+		if (begin + e->header.size > base + buffer_size) {
+			long len = base + buffer_size - begin;
+
+			assert(len < e->header.size);
+			memcpy(buf, begin, len);
+			memcpy(buf + len, base, e->header.size - len);
+			e = (void *) buf;
+			begin = base + e->header.size - len;
+		} else if (begin + e->header.size == base + buffer_size) {
+			begin = base;
+		} else {
+			begin += e->header.size;
+		}
+
+		if (e->header.type == PERF_RECORD_SAMPLE) {
+			fn(e->data, e->size);
+		} else if (e->header.type == PERF_RECORD_LOST) {
+			struct {
+				struct perf_event_header header;
+				__u64 id;
+				__u64 lost;
+			} *lost = (void *) e;
+			printf("lost %lld events\n", lost->lost);
+		} else {
+			printf("unknown event type=%d size=%d\n",
+			       e->header.type, e->header.size);
+		}
+	}
+
+	__sync_synchronize(); /* smp_mb() */
+	readers[index]->data_tail = data_head;
+}
+
+static const char *get_event_type(enum map_event_type event)
+{
+	switch (event) {
+	case MAP_CREATE:
+		return "CREATE";
+	case MAP_LOOKUP:
+		return "LOOKUP";
+	case MAP_UPDATE:
+		return "UPDATE";
+	case MAP_NEXT_KEY:
+		return "NEXT_KEY";
+	}
+
+	return "UNKNOWN";
+}
+
+
+static void map_event_callback(void *data, int size)
+{
+	struct {
+		__u32 map_id;
+		enum map_event_type event_type;
+		__u32 map_type;
+	} *e = data;
+
+	printf("%s event for map id: %d and type: %d\n",
+	       get_event_type(e->event_type), e->map_id, e->map_type);
+}
+
+static bool init_filtered_ids_map(int num_ids, int *filtered_ids)
+{
+	int i, key, value;
+	bool filtering = false;
+	/*
+	 * I am going to put the IDs in the map. Only event related to those IDs
+	 * will be shown. The key indicates the ID of the map while the value
+	 * is not used and then is set to 0.
+	 */
+	for (i = 0; i < num_ids; i++) {
+		key = filtered_ids[i];
+		value = 0;
+		if (bpf_map_update_elem(map_fd[1], &key, &value, 0) != 0) {
+			fprintf(stderr,
+			"ERR: bpf_map_update_elem failed key:0x%X\n", key);
+		return false;
+		}
+	}
+
+	if (num_ids > 0)
+		filtering = true;
+
+	key = 0;
+	assert(bpf_map_update_elem(map_fd[2], &key, &filtering, BPF_ANY) == 0);
+	return true;
+}
+
+static bool init_perf_buffer_data_structures(int nr_cpus)
+{
+	int i;
+
+	perf_fd = malloc(sizeof(int) * nr_cpus);
+	assert(perf_fd);
+	readers = malloc(sizeof(*readers) * nr_cpus);
+	assert(readers);
+
+	epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+
+	for (i = 0; i < nr_cpus; i++) {
+		printf("Init bpf_perf_event for cpu:%d\n", i);
+		init_bpf_perf_event_on_cpu(i);
+	}
+
+	return true;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	int i, cnt, opt, ret = EXIT_SUCCESS;
+	char bpf_obj_file[256];
+	int num_ids = 0, nr_cpus = bpf_num_possible_cpus();
+	int filtered_ids[MAX_FILTERED_IDS];
+
+	snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
+
+	/* Parse commands line args */
+	while ((opt = getopt(argc, argv, "hi:")) != -1) {
+		switch (opt) {
+		case 'i':
+			if (num_ids == MAX_FILTERED_IDS) {
+				printf("Reached maximum number of IDs");
+				return EXIT_FAILURE;
+			}
+			i = atoi(optarg);
+			if (!i)
+				printf("ERROR - Invalid id %s", optarg);
+			else
+				filtered_ids[num_ids++] = i;
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return EXIT_FAILURE;
+	}
+
+	if (load_bpf_file(bpf_obj_file)) {
+		printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
+		return EXIT_FAILURE;
+	}
+
+	if (!prog_fd[0]) {
+		printf("ERROR - load_bpf_file: %s\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	init_filtered_ids_map(num_ids, filtered_ids);
+	init_perf_buffer_data_structures(nr_cpus);
+
+	struct epoll_event *events = calloc(nr_cpus, sizeof(*events));
+
+	while (true) {
+		printf("Waiting for map events...\n");
+		cnt = perf_event_poll(epoll_fd, nr_cpus, events);
+		for (i = 0; i < cnt; i++)
+			perf_event_read(map_event_callback, events[i].data.u32);
+	}
+
+	free(perf_fd);
+	free(readers);
+	free(events);
+
+	return ret;
+}

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 1/3] bpf: add id to map tracepoint
  2018-04-18 15:30 ` [bpf-next PATCH 1/3] bpf: add id to map tracepoint Sebastiano Miano
@ 2018-04-19  9:08   ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 12+ messages in thread
From: Jesper Dangaard Brouer @ 2018-04-19  9:08 UTC (permalink / raw)
  To: Sebastiano Miano
  Cc: netdev, ast, daniel, mingo, rostedt, fulvio.risso, brouer

On Wed, 18 Apr 2018 17:30:48 +0200
Sebastiano Miano <sebastiano.miano@polito.it> wrote:

> This patch adds the map id to the bpf tracepoints
> that can be used when monitoring or inspecting map
> related functions.
> 
> Signed-off-by: Sebastiano Miano <sebastiano.miano@polito.it>
> Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

Thanks you for doing this.  I've needed this before when
troubleshooting my XDP programs (specifically xdp_ddos01_blacklist[1]).

E.g. when I want to verify that my tools are doing the right thing, I
can now find the XDP prog id via 'ip link' or bpftool, and list the map
IDs used by the prog tool (via bpftool), and now use perf to record map
changes, which now have the needed IDs I can filter on.  Before, I
could not tell the difference if the program was updating the correct
map (which were a mistake I ran into).

Perf record even support supplying filters on the cmdline, like:

 perf record -e bpf:bpf_map_* -a --filter 'id == 2 || id == 1' sleep 100

And yes, doing filtering this way is slow, compared to doing it via a
bpf_prog inside the kernel, which Sebastiano already provide a sample
on howto do.  But I just needed a way to find the bug in my program,
not any high speed usage.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

[1] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/samples/bpf/xdp_ddos01_blacklist_cmdline.c

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 2/3] bpf: add id to prog tracepoint
  2018-04-18 15:30 ` [bpf-next PATCH 2/3] bpf: add id to prog tracepoint Sebastiano Miano
@ 2018-04-19  9:10   ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 12+ messages in thread
From: Jesper Dangaard Brouer @ 2018-04-19  9:10 UTC (permalink / raw)
  To: Sebastiano Miano
  Cc: netdev, ast, daniel, mingo, rostedt, fulvio.risso, brouer

On Wed, 18 Apr 2018 17:30:53 +0200
Sebastiano Miano <sebastiano.miano@polito.it> wrote:

> This patch adds the prog id to the bpf tracepoints
> that can be used when monitoring or inspecting prog
> related functions.
> 
> Signed-off-by: Sebastiano Miano <sebastiano.miano@polito.it>
> Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-18 15:30 ` [bpf-next PATCH 3/3] bpf: add sample program to trace map events Sebastiano Miano
@ 2018-04-19  9:20   ` Jesper Dangaard Brouer
  2018-04-20  0:27   ` Alexei Starovoitov
  1 sibling, 0 replies; 12+ messages in thread
From: Jesper Dangaard Brouer @ 2018-04-19  9:20 UTC (permalink / raw)
  To: Sebastiano Miano
  Cc: netdev, ast, daniel, mingo, rostedt, fulvio.risso, brouer

On Wed, 18 Apr 2018 17:30:59 +0200
Sebastiano Miano <sebastiano.miano@polito.it> wrote:

> This patch adds a sample program, called trace_map_events,
> that shows how to capture map events and filter them based on
> the map id.
> 
> The program accepts a list of map IDs, via the -i command line
> option, and filters all the map events related to those IDs (i.e.,
> map_create/update/lookup/next_key).
> If no IDs are specified, all map events are listed and no filtering
> is performed.
> 
> Sample usage:
> 
>  # trace_map_events -i <map_id1> -i <map_id2> -i <map_id3> ...
> 
> Signed-off-by: Sebastiano Miano <sebastiano.miano@polito.it>

Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

I have tested it works:

$ sudo ./trace_map_events -i 2
Init bpf_perf_event for cpu:0
Init bpf_perf_event for cpu:1
Init bpf_perf_event for cpu:2
Init bpf_perf_event for cpu:3
Init bpf_perf_event for cpu:4
Init bpf_perf_event for cpu:5
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-18 15:30 ` [bpf-next PATCH 3/3] bpf: add sample program to trace map events Sebastiano Miano
  2018-04-19  9:20   ` Jesper Dangaard Brouer
@ 2018-04-20  0:27   ` Alexei Starovoitov
  2018-04-20  9:47     ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2018-04-20  0:27 UTC (permalink / raw)
  To: Sebastiano Miano
  Cc: netdev, ast, daniel, mingo, rostedt, brouer, fulvio.risso,
	David S. Miller

On Wed, Apr 18, 2018 at 05:30:59PM +0200, Sebastiano Miano wrote:
> This patch adds a sample program, called trace_map_events,
> that shows how to capture map events and filter them based on
> the map id.
...
> +struct bpf_map_keyval_ctx {
> +	u64 pad;		// First 8 bytes are not accessible by bpf code
> +	u32 type;		// offset:8;	size:4;	signed:0;
> +	u32 key_len;		// offset:12;	size:4;	signed:0;
> +	u32 key;		// offset:16;	size:4;	signed:0;
> +	bool key_trunc;		// offset:20;	size:1;	signed:0;
> +	u32 val_len;		// offset:24;	size:4;	signed:0;
> +	u32 val;		// offset:28;	size:4;	signed:0;
> +	bool val_trunc;		// offset:32;	size:1;	signed:0;
> +	int ufd;		// offset:36;	size:4;	signed:1;
> +	u32 id;			// offset:40;	size:4;	signed:0;
> +};
> +
> +SEC("tracepoint/bpf/bpf_map_lookup_elem")
> +int trace_bpf_map_lookup_elem(struct bpf_map_keyval_ctx *ctx)
> +{
> +	struct map_event_data data;
> +	int cpu = bpf_get_smp_processor_id();
> +	bool *filter;
> +	u32 key = 0, map_id = ctx->id;
> +
> +	filter = bpf_map_lookup_elem(&filter_events, &key);
> +	if (!filter)
> +		return 1;
> +
> +	if (!*filter)
> +		goto send_event;
> +
> +	/*
> +	 * If the map_id is not in the list of filtered
> +	 * ids we immediately return
> +	 */
> +	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
> +		return 0;
> +
> +send_event:
> +	data.map_id = map_id;
> +	data.evnt_type = MAP_LOOKUP;
> +	data.map_type = ctx->type;
> +
> +	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
> +	return 0;
> +}

looks like the purpose of the series is to create map notify mechanism
so some external user space daemon can snoop all bpf map operations
that all other processes and bpf programs are doing.
I think it would be way better to create a proper mechanism for that
with permissions.

tracepoints in the bpf core were introduced as introspection mechanism
for debugging. Right now we have better ways to do introspection
with ids, queries, etc that bpftool is using, so original purpose of
those tracepoints is gone and they actually rot.
Let's not repurpose them into this map notify logic.
I don't want tracepoints in the bpf core to become a stable interface
it will stiffen the development.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-20  0:27   ` Alexei Starovoitov
@ 2018-04-20  9:47     ` Jesper Dangaard Brouer
  2018-04-23 14:08       ` Sebastiano Miano
  0 siblings, 1 reply; 12+ messages in thread
From: Jesper Dangaard Brouer @ 2018-04-20  9:47 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Sebastiano Miano, netdev, ast, daniel, mingo, rostedt,
	fulvio.risso, David S. Miller, brouer

On Thu, 19 Apr 2018 17:27:37 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Wed, Apr 18, 2018 at 05:30:59PM +0200, Sebastiano Miano wrote:
> > This patch adds a sample program, called trace_map_events,
> > that shows how to capture map events and filter them based on
> > the map id.  
> ...
> > +struct bpf_map_keyval_ctx {
> > +	u64 pad;		// First 8 bytes are not accessible by bpf code
> > +	u32 type;		// offset:8;	size:4;	signed:0;
> > +	u32 key_len;		// offset:12;	size:4;	signed:0;
> > +	u32 key;		// offset:16;	size:4;	signed:0;
> > +	bool key_trunc;		// offset:20;	size:1;	signed:0;
> > +	u32 val_len;		// offset:24;	size:4;	signed:0;
> > +	u32 val;		// offset:28;	size:4;	signed:0;
> > +	bool val_trunc;		// offset:32;	size:1;	signed:0;
> > +	int ufd;		// offset:36;	size:4;	signed:1;
> > +	u32 id;			// offset:40;	size:4;	signed:0;
> > +};
> > +
> > +SEC("tracepoint/bpf/bpf_map_lookup_elem")
> > +int trace_bpf_map_lookup_elem(struct bpf_map_keyval_ctx *ctx)
> > +{
> > +	struct map_event_data data;
> > +	int cpu = bpf_get_smp_processor_id();
> > +	bool *filter;
> > +	u32 key = 0, map_id = ctx->id;
> > +
> > +	filter = bpf_map_lookup_elem(&filter_events, &key);
> > +	if (!filter)
> > +		return 1;
> > +
> > +	if (!*filter)
> > +		goto send_event;
> > +
> > +	/*
> > +	 * If the map_id is not in the list of filtered
> > +	 * ids we immediately return
> > +	 */
> > +	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
> > +		return 0;
> > +
> > +send_event:
> > +	data.map_id = map_id;
> > +	data.evnt_type = MAP_LOOKUP;
> > +	data.map_type = ctx->type;
> > +
> > +	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
> > +	return 0;
> > +}  
> 
> looks like the purpose of the series is to create map notify mechanism
> so some external user space daemon can snoop all bpf map operations
> that all other processes and bpf programs are doing.
> I think it would be way better to create a proper mechanism for that
> with permissions.
> 
> tracepoints in the bpf core were introduced as introspection mechanism
> for debugging. Right now we have better ways to do introspection
> with ids, queries, etc that bpftool is using, so original purpose of
> those tracepoints is gone and they actually rot.
> Let's not repurpose them into this map notify logic.
> I don't want tracepoints in the bpf core to become a stable interface
> it will stiffen the development.

Well, I need it exactly for introspection and debugging, and just need
the missing ID (as it was introduced later).

Can we just drop the sample program then? I would likely not use the
sample program, because it is missing the PID+comm-name.  

For my use, I can simply use perf record to debug what programs are
changing the map ID:

  perf record -e bpf:bpf_map_* -a --filter 'id == 2' 

This should be a fairly common troubleshooting step.  I want to
figure-out if anybody else (another userspace program) is changing my
map. This can easily be caused by two userspace control programs
running at the same time. Sysadm/scripts made a mistake and started two
instances. Without the map ID, I cannot know what map perf is talking
about...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-20  9:47     ` Jesper Dangaard Brouer
@ 2018-04-23 14:08       ` Sebastiano Miano
  2018-04-23 20:08         ` Alexei Starovoitov
  0 siblings, 1 reply; 12+ messages in thread
From: Sebastiano Miano @ 2018-04-23 14:08 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Alexei Starovoitov
  Cc: netdev, ast, daniel, mingo, rostedt, fulvio.risso, David S. Miller

On 20/04/2018 11:47, Jesper Dangaard Brouer wrote:
> On Thu, 19 Apr 2018 17:27:37 -0700
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
>> On Wed, Apr 18, 2018 at 05:30:59PM +0200, Sebastiano Miano wrote:
>>> This patch adds a sample program, called trace_map_events,
>>> that shows how to capture map events and filter them based on
>>> the map id.
>> ...
>>> +struct bpf_map_keyval_ctx {
>>> +	u64 pad;		// First 8 bytes are not accessible by bpf code
>>> +	u32 type;		// offset:8;	size:4;	signed:0;
>>> +	u32 key_len;		// offset:12;	size:4;	signed:0;
>>> +	u32 key;		// offset:16;	size:4;	signed:0;
>>> +	bool key_trunc;		// offset:20;	size:1;	signed:0;
>>> +	u32 val_len;		// offset:24;	size:4;	signed:0;
>>> +	u32 val;		// offset:28;	size:4;	signed:0;
>>> +	bool val_trunc;		// offset:32;	size:1;	signed:0;
>>> +	int ufd;		// offset:36;	size:4;	signed:1;
>>> +	u32 id;			// offset:40;	size:4;	signed:0;
>>> +};
>>> +
>>> +SEC("tracepoint/bpf/bpf_map_lookup_elem")
>>> +int trace_bpf_map_lookup_elem(struct bpf_map_keyval_ctx *ctx)
>>> +{
>>> +	struct map_event_data data;
>>> +	int cpu = bpf_get_smp_processor_id();
>>> +	bool *filter;
>>> +	u32 key = 0, map_id = ctx->id;
>>> +
>>> +	filter = bpf_map_lookup_elem(&filter_events, &key);
>>> +	if (!filter)
>>> +		return 1;
>>> +
>>> +	if (!*filter)
>>> +		goto send_event;
>>> +
>>> +	/*
>>> +	 * If the map_id is not in the list of filtered
>>> +	 * ids we immediately return
>>> +	 */
>>> +	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
>>> +		return 0;
>>> +
>>> +send_event:
>>> +	data.map_id = map_id;
>>> +	data.evnt_type = MAP_LOOKUP;
>>> +	data.map_type = ctx->type;
>>> +
>>> +	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
>>> +	return 0;
>>> +}
>>
>> looks like the purpose of the series is to create map notify mechanism
>> so some external user space daemon can snoop all bpf map operations
>> that all other processes and bpf programs are doing.
>> I think it would be way better to create a proper mechanism for that
>> with permissions.
>>
>> tracepoints in the bpf core were introduced as introspection mechanism
>> for debugging. Right now we have better ways to do introspection
>> with ids, queries, etc that bpftool is using, so original purpose of
>> those tracepoints is gone and they actually rot.
>> Let's not repurpose them into this map notify logic.
>> I don't want tracepoints in the bpf core to become a stable interface
>> it will stiffen the development.
> 
> Well, I need it exactly for introspection and debugging, and just need
> the missing ID (as it was introduced later).
> 
> Can we just drop the sample program then? I would likely not use the
> sample program, because it is missing the PID+comm-name.

I agree with Jesper. We could drop this sample program but, do you think 
it is worth spending some time to create another sample program that 
shows the correct or suggested way to do it?

I believe it would be useful to have a program for this since I found it 
difficult to understand how to do it with the current 
documentation/samples and it seems I was not the only one hitting this 
problem.

> 
> For my use, I can simply use perf record to debug what programs are
> changing the map ID:
> 
>    perf record -e bpf:bpf_map_* -a --filter 'id == 2'
> 
> This should be a fairly common troubleshooting step.  I want to
> figure-out if anybody else (another userspace program) is changing my
> map. This can easily be caused by two userspace control programs
> running at the same time. Sysadm/scripts made a mistake and started two
> instances. Without the map ID, I cannot know what map perf is talking
> about...
> 

That's in fact the real use case for the first two patches. Since bpf 
tracepoints are still a rather common (and easy to use) troubleshooting 
and monitoring tool why shouldn't we "enhance" their support with the 
newly added map/prog IDs?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-23 14:08       ` Sebastiano Miano
@ 2018-04-23 20:08         ` Alexei Starovoitov
  2018-04-23 20:22           ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2018-04-23 20:08 UTC (permalink / raw)
  To: Sebastiano Miano
  Cc: Jesper Dangaard Brouer, netdev, ast, daniel, mingo, rostedt,
	fulvio.risso, David S. Miller

On Mon, Apr 23, 2018 at 04:08:36PM +0200, Sebastiano Miano wrote:
> 
> That's in fact the real use case for the first two patches. Since bpf
> tracepoints are still a rather common (and easy to use) troubleshooting and
> monitoring tool why shouldn't we "enhance" their support with the newly
> added map/prog IDs?

because these tracepoints can be abused in the way that this patch demonstrated.
Whether to keep this patch in the series or not is irrelevant.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events
  2018-04-23 20:08         ` Alexei Starovoitov
@ 2018-04-23 20:22           ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 12+ messages in thread
From: Jesper Dangaard Brouer @ 2018-04-23 20:22 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Sebastiano Miano, netdev, ast, daniel, mingo, rostedt,
	fulvio.risso, David S. Miller, brouer

On Mon, 23 Apr 2018 14:08:02 -0600
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Mon, Apr 23, 2018 at 04:08:36PM +0200, Sebastiano Miano wrote:
> > 
> > That's in fact the real use case for the first two patches. Since bpf
> > tracepoints are still a rather common (and easy to use) troubleshooting and
> > monitoring tool why shouldn't we "enhance" their support with the newly
> > added map/prog IDs?  
> 
> because these tracepoints can be abused in the way that this patch demonstrated.
> Whether to keep this patch in the series or not is irrelevant.

I don't understand your abuse use-case, can you explain what you mean?

You do realize that these tracepoints can _only_ monitor the userspace
map activity (not kernel map changes) ... and we _do_ need a way to
debug this (and without the map_id I can tell which map).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-04-23 20:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-18 15:30 [bpf-next PATCH 0/3] Add ID to bpf_map/prog tracepoints Sebastiano Miano
2018-04-18 15:30 ` [bpf-next PATCH 1/3] bpf: add id to map tracepoint Sebastiano Miano
2018-04-19  9:08   ` Jesper Dangaard Brouer
2018-04-18 15:30 ` [bpf-next PATCH 2/3] bpf: add id to prog tracepoint Sebastiano Miano
2018-04-19  9:10   ` Jesper Dangaard Brouer
2018-04-18 15:30 ` [bpf-next PATCH 3/3] bpf: add sample program to trace map events Sebastiano Miano
2018-04-19  9:20   ` Jesper Dangaard Brouer
2018-04-20  0:27   ` Alexei Starovoitov
2018-04-20  9:47     ` Jesper Dangaard Brouer
2018-04-23 14:08       ` Sebastiano Miano
2018-04-23 20:08         ` Alexei Starovoitov
2018-04-23 20:22           ` Jesper Dangaard Brouer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.