All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi
@ 2018-05-07 17:50 Song Liu
  2018-05-07 17:50 ` [PATCH v3 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Song Liu @ 2018-05-07 17:50 UTC (permalink / raw)
  To: netdev; +Cc: Song Liu, kernel-team, qinteng, tobin

Changes v2 -> v3:
  Improve syntax based on suggestion by Tobin C. Harding.

Changes v1 -> v2:
  1. Rename some variables to (hopefully) reduce confusion;
  2. Check irq_work status with IRQ_WORK_BUSY (instead of work->sem);
  3. In Kconfig, let BPF_SYSCALL select IRQ_WORK;
  4. Add static to DEFINE_PER_CPU();
   5. Remove pr_info() in stack_map_init().

Song Liu (2):
  bpf: enable stackmap with build_id in nmi context
  bpf: add selftest for stackmap with build_id in NMI context

 init/Kconfig                               |   1 +
 kernel/bpf/stackmap.c                      |  59 +++++++++++--
 tools/testing/selftests/bpf/test_progs.c   | 134 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/urandom_read.c |  10 ++-
 4 files changed, 196 insertions(+), 8 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context
  2018-05-07 17:50 [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
@ 2018-05-07 17:50 ` Song Liu
  2018-05-07 17:50 ` [PATCH v3 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
  2018-05-14 23:09 ` [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Daniel Borkmann
  2 siblings, 0 replies; 4+ messages in thread
From: Song Liu @ 2018-05-07 17:50 UTC (permalink / raw)
  To: netdev
  Cc: Song Liu, kernel-team, qinteng, tobin, Alexei Starovoitov,
	Daniel Borkmann, Peter Zijlstra

Currently, we cannot parse build_id in nmi context because of
up_read(&current->mm->mmap_sem), this makes stackmap with build_id
less useful. This patch enables parsing build_id in nmi by putting
the up_read() call in irq_work. To avoid memory allocation in nmi
context, we use per cpu variable for the irq_work. As a result, only
one irq_work per cpu is allowed. If the irq_work is in-use, we
fallback to only report ips.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 init/Kconfig          |  1 +
 kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index f013afc..480a4f2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1391,6 +1391,7 @@ config BPF_SYSCALL
 	bool "Enable bpf() system call"
 	select ANON_INODES
 	select BPF
+	select IRQ_WORK
 	default n
 	help
 	  Enable the bpf() system call that allows to manipulate eBPF
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 3ba102b..b59ace0 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -11,6 +11,7 @@
 #include <linux/perf_event.h>
 #include <linux/elf.h>
 #include <linux/pagemap.h>
+#include <linux/irq_work.h>
 #include "percpu_freelist.h"
 
 #define STACK_CREATE_FLAG_MASK					\
@@ -32,6 +33,23 @@ struct bpf_stack_map {
 	struct stack_map_bucket *buckets[];
 };
 
+/* irq_work to run up_read() for build_id lookup in nmi context */
+struct stack_map_irq_work {
+	struct irq_work irq_work;
+	struct rw_semaphore *sem;
+};
+
+static void do_up_read(struct irq_work *entry)
+{
+	struct stack_map_irq_work *work;
+
+	work = container_of(entry, struct stack_map_irq_work, irq_work);
+	up_read(work->sem);
+	work->sem = NULL;
+}
+
+static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
+
 static inline bool stack_map_use_build_id(struct bpf_map *map)
 {
 	return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 {
 	int i;
 	struct vm_area_struct *vma;
+	bool in_nmi_ctx = in_nmi();
+	bool irq_work_busy = false;
+	struct stack_map_irq_work *work;
+
+	if (in_nmi_ctx) {
+		work = this_cpu_ptr(&up_read_work);
+		if (work->irq_work.flags & IRQ_WORK_BUSY)
+			/* cannot queue more up_read, fallback */
+			irq_work_busy = true;
+	}
 
 	/*
-	 * We cannot do up_read() in nmi context, so build_id lookup is
-	 * only supported for non-nmi events. If at some point, it is
-	 * possible to run find_vma() without taking the semaphore, we
-	 * would like to allow build_id lookup in nmi context.
+	 * We cannot do up_read() in nmi context. To do build_id lookup
+	 * in nmi context, we need to run up_read() in irq_work. We use
+	 * a percpu variable to do the irq_work. If the irq_work is
+	 * already used by another lookup, we fall back to report ips.
 	 *
 	 * Same fallback is used for kernel stack (!user) on a stackmap
 	 * with build_id.
 	 */
-	if (!user || !current || !current->mm || in_nmi() ||
+	if (!user || !current || !current->mm || irq_work_busy ||
 	    down_read_trylock(&current->mm->mmap_sem) == 0) {
 		/* cannot access current->mm, fall back to ips */
 		for (i = 0; i < trace_nr; i++) {
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 			- vma->vm_start;
 		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
 	}
-	up_read(&current->mm->mmap_sem);
+
+	if (!in_nmi_ctx) {
+		up_read(&current->mm->mmap_sem);
+	} else {
+		work->sem = &current->mm->mmap_sem;
+		irq_work_queue(&work->irq_work);
+	}
 }
 
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
 	.map_update_elem = stack_map_update_elem,
 	.map_delete_elem = stack_map_delete_elem,
 };
+
+static int __init stack_map_init(void)
+{
+	int cpu;
+	struct stack_map_irq_work *work;
+
+	for_each_possible_cpu(cpu) {
+		work = per_cpu_ptr(&up_read_work, cpu);
+		init_irq_work(&work->irq_work, do_up_read);
+	}
+	return 0;
+}
+subsys_initcall(stack_map_init);
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
  2018-05-07 17:50 [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
  2018-05-07 17:50 ` [PATCH v3 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
@ 2018-05-07 17:50 ` Song Liu
  2018-05-14 23:09 ` [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Daniel Borkmann
  2 siblings, 0 replies; 4+ messages in thread
From: Song Liu @ 2018-05-07 17:50 UTC (permalink / raw)
  To: netdev; +Cc: Song Liu, kernel-team, qinteng, tobin

This new test captures stackmap with build_id with hardware event
PERF_COUNT_HW_CPU_CYCLES.

Because we only support one ips-to-build_id lookup per cpu in NMI
context, stack_amap will not be able to do the lookup in this test.
Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.

urandom_read.c is extended to run configurable cycles so that it can be
caught by the perf event.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/testing/selftests/bpf/test_progs.c   | 134 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/urandom_read.c |  10 ++-
 2 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index aa336f0..5d2d8ef 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1272,6 +1272,139 @@ static void test_stacktrace_build_id(void)
 	return;
 }
 
+static void test_stacktrace_build_id_nmi(void)
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {
+		.sample_freq = 5000,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		return;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open",
+		  "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	if (CHECK(build_id_matches < 1, "build id match",
+		  "Didn't find expected build ID from the map\n"))
+		goto disable_pmu;
+
+	/*
+	 * We intentionally skip compare_stack_ips(). This is because we
+	 * only support one in_nmi() ips-to-build_id translation per cpu
+	 * at any time, thus stack_amap here will always fallback to
+	 * BPF_STACK_BUILD_ID_IP;
+	 */
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+}
+
 #define MAX_CNT_RAWTP	10ull
 #define MAX_STACK_RAWTP	100
 struct get_stack_trace_t {
@@ -1425,6 +1558,7 @@ int main(void)
 	test_tp_attach_query();
 	test_stacktrace_map();
 	test_stacktrace_build_id();
+	test_stacktrace_build_id_nmi();
 	test_stacktrace_map_raw_tp();
 	test_get_stack_raw_tp();
 
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index 4acfdeb..9de8b7c 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -6,15 +6,21 @@
 #include <stdlib.h>
 
 #define BUF_SIZE 256
-int main(void)
+
+int main(int argc, char *argv[])
 {
 	int fd = open("/dev/urandom", O_RDONLY);
 	int i;
 	char buf[BUF_SIZE];
+	int count = 4;
 
 	if (fd < 0)
 		return 1;
-	for (i = 0; i < 4; ++i)
+
+	if (argc == 2)
+		count = atoi(argv[1]);
+
+	for (i = 0; i < count; ++i)
 		read(fd, buf, BUF_SIZE);
 
 	close(fd);
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi
  2018-05-07 17:50 [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
  2018-05-07 17:50 ` [PATCH v3 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
  2018-05-07 17:50 ` [PATCH v3 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
@ 2018-05-14 23:09 ` Daniel Borkmann
  2 siblings, 0 replies; 4+ messages in thread
From: Daniel Borkmann @ 2018-05-14 23:09 UTC (permalink / raw)
  To: Song Liu, netdev; +Cc: kernel-team, qinteng, tobin

On 05/07/2018 07:50 PM, Song Liu wrote:
> Changes v2 -> v3:
>   Improve syntax based on suggestion by Tobin C. Harding.
> 
> Changes v1 -> v2:
>   1. Rename some variables to (hopefully) reduce confusion;
>   2. Check irq_work status with IRQ_WORK_BUSY (instead of work->sem);
>   3. In Kconfig, let BPF_SYSCALL select IRQ_WORK;
>   4. Add static to DEFINE_PER_CPU();
>    5. Remove pr_info() in stack_map_init().
> 
> Song Liu (2):
>   bpf: enable stackmap with build_id in nmi context
>   bpf: add selftest for stackmap with build_id in NMI context
> 
>  init/Kconfig                               |   1 +
>  kernel/bpf/stackmap.c                      |  59 +++++++++++--
>  tools/testing/selftests/bpf/test_progs.c   | 134 +++++++++++++++++++++++++++++
>  tools/testing/selftests/bpf/urandom_read.c |  10 ++-
>  4 files changed, 196 insertions(+), 8 deletions(-)

Applied to bpf-next, thanks Song!

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-05-14 23:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-07 17:50 [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
2018-05-07 17:50 ` [PATCH v3 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
2018-05-07 17:50 ` [PATCH v3 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
2018-05-14 23:09 ` [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Daniel Borkmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.