All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi
@ 2018-05-02 23:20 Song Liu
  2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
  2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
  0 siblings, 2 replies; 6+ messages in thread
From: Song Liu @ 2018-05-02 23:20 UTC (permalink / raw)
  To: netdev; +Cc: Song Liu, kernel-team, qinteng

Changes v1 -> v2:
  1. Rename some variables to (hopefully) reduce confusion;
  2. Check irq_work status with IRQ_WORK_BUSY (instead of work->sem);
  3. In Kconfig, let BPF_SYSCALL select IRQ_WORK;
  4. Add static to DEFINE_PER_CPU();
  5. Remove pr_info() in stack_map_init().

Song Liu (2):
  bpf: enable stackmap with build_id in nmi context
  bpf: add selftest for stackmap with build_id in NMI context

 init/Kconfig                               |   1 +
 kernel/bpf/stackmap.c                      |  59 +++++++++++--
 tools/testing/selftests/bpf/test_progs.c   | 137 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/urandom_read.c |  10 ++-
 4 files changed, 199 insertions(+), 8 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context
  2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
@ 2018-05-02 23:20 ` Song Liu
  2018-05-03  7:03   ` Tobin C. Harding
  2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
  1 sibling, 1 reply; 6+ messages in thread
From: Song Liu @ 2018-05-02 23:20 UTC (permalink / raw)
  To: netdev
  Cc: Song Liu, kernel-team, qinteng, Alexei Starovoitov,
	Daniel Borkmann, Peter Zijlstra

Currently, we cannot parse build_id in nmi context because of
up_read(&current->mm->mmap_sem), this makes stackmap with build_id
less useful. This patch enables parsing build_id in nmi by putting
the up_read() call in irq_work. To avoid memory allocation in nmi
context, we use per cpu variable for the irq_work. As a result, only
one irq_work per cpu is allowed. If the irq_work is in-use, we
fallback to only report ips.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 init/Kconfig          |  1 +
 kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index f013afc..480a4f2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1391,6 +1391,7 @@ config BPF_SYSCALL
 	bool "Enable bpf() system call"
 	select ANON_INODES
 	select BPF
+	select IRQ_WORK
 	default n
 	help
 	  Enable the bpf() system call that allows to manipulate eBPF
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 3ba102b..51d4aea 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -11,6 +11,7 @@
 #include <linux/perf_event.h>
 #include <linux/elf.h>
 #include <linux/pagemap.h>
+#include <linux/irq_work.h>
 #include "percpu_freelist.h"
 
 #define STACK_CREATE_FLAG_MASK					\
@@ -32,6 +33,23 @@ struct bpf_stack_map {
 	struct stack_map_bucket *buckets[];
 };
 
+/* irq_work to run up_read() for build_id lookup in nmi context */
+struct stack_map_irq_work {
+	struct irq_work irq_work;
+	struct rw_semaphore *sem;
+};
+
+static void do_up_read(struct irq_work *entry)
+{
+	struct stack_map_irq_work *work = container_of(entry,
+			struct stack_map_irq_work, irq_work);
+
+	up_read(work->sem);
+	work->sem = NULL;
+}
+
+static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
+
 static inline bool stack_map_use_build_id(struct bpf_map *map)
 {
 	return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 {
 	int i;
 	struct vm_area_struct *vma;
+	bool in_nmi_ctx = in_nmi();
+	bool irq_work_busy = false;
+	struct stack_map_irq_work *work;
+
+	if (in_nmi_ctx) {
+		work = this_cpu_ptr(&up_read_work);
+		if (work->irq_work.flags & IRQ_WORK_BUSY)
+			/* cannot queue more up_read, fallback */
+			irq_work_busy = true;
+	}
 
 	/*
-	 * We cannot do up_read() in nmi context, so build_id lookup is
-	 * only supported for non-nmi events. If at some point, it is
-	 * possible to run find_vma() without taking the semaphore, we
-	 * would like to allow build_id lookup in nmi context.
+	 * We cannot do up_read() in nmi context. To do build_id lookup
+	 * in nmi context, we need to run up_read() in irq_work. We use
+	 * a percpu variable to do the irq_work. If the irq_work is
+	 * already used by another lookup, we fall back to report ips.
 	 *
 	 * Same fallback is used for kernel stack (!user) on a stackmap
 	 * with build_id.
 	 */
-	if (!user || !current || !current->mm || in_nmi() ||
+	if (!user || !current || !current->mm || irq_work_busy ||
 	    down_read_trylock(&current->mm->mmap_sem) == 0) {
 		/* cannot access current->mm, fall back to ips */
 		for (i = 0; i < trace_nr; i++) {
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 			- vma->vm_start;
 		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
 	}
-	up_read(&current->mm->mmap_sem);
+
+	if (!in_nmi_ctx)
+		up_read(&current->mm->mmap_sem);
+	else {
+		work->sem = &current->mm->mmap_sem;
+		irq_work_queue(&work->irq_work);
+	}
 }
 
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
 	.map_update_elem = stack_map_update_elem,
 	.map_delete_elem = stack_map_delete_elem,
 };
+
+static int __init stack_map_init(void)
+{
+	int cpu;
+	struct stack_map_irq_work *work;
+
+	for_each_possible_cpu(cpu) {
+		work = per_cpu_ptr(&up_read_work, cpu);
+		init_irq_work(&work->irq_work, do_up_read);
+	}
+	return 0;
+}
+subsys_initcall(stack_map_init);
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
  2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
  2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
@ 2018-05-02 23:20 ` Song Liu
  2018-05-03  7:19   ` Tobin C. Harding
  1 sibling, 1 reply; 6+ messages in thread
From: Song Liu @ 2018-05-02 23:20 UTC (permalink / raw)
  To: netdev; +Cc: Song Liu, kernel-team, qinteng

This new test captures stackmap with build_id with hardware event
PERF_COUNT_HW_CPU_CYCLES.

Because we only support one ips-to-build_id lookup per cpu in NMI
context, stack_amap will not be able to do the lookup in this test.
Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.

urandom_read.c is extended to run configurable cycles so that it can be
caught by the perf event.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 tools/testing/selftests/bpf/test_progs.c   | 137 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/urandom_read.c |  10 ++-
 2 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index aa336f0..00bb08c 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1272,6 +1272,142 @@ static void test_stacktrace_build_id(void)
 	return;
 }
 
+static void test_stacktrace_build_id_nmi(void)
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {
+		.sample_freq = 5000,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		goto out;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open",
+		  "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	if (CHECK(build_id_matches < 1, "build id match",
+		  "Didn't find expected build ID from the map\n"))
+		goto disable_pmu;
+
+	/*
+	 * We intentionally skip compare_stack_ips(). This is because we
+	 * only support one in_nmi() ips-to-build_id translation per cpu
+	 * at any time, thus stack_amap here will always fallback to
+	 * BPF_STACK_BUILD_ID_IP;
+	 */
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+
+out:
+	return;
+}
+
 #define MAX_CNT_RAWTP	10ull
 #define MAX_STACK_RAWTP	100
 struct get_stack_trace_t {
@@ -1425,6 +1561,7 @@ int main(void)
 	test_tp_attach_query();
 	test_stacktrace_map();
 	test_stacktrace_build_id();
+	test_stacktrace_build_id_nmi();
 	test_stacktrace_map_raw_tp();
 	test_get_stack_raw_tp();
 
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index 4acfdeb..9de8b7c 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -6,15 +6,21 @@
 #include <stdlib.h>
 
 #define BUF_SIZE 256
-int main(void)
+
+int main(int argc, char *argv[])
 {
 	int fd = open("/dev/urandom", O_RDONLY);
 	int i;
 	char buf[BUF_SIZE];
+	int count = 4;
 
 	if (fd < 0)
 		return 1;
-	for (i = 0; i < 4; ++i)
+
+	if (argc == 2)
+		count = atoi(argv[1]);
+
+	for (i = 0; i < count; ++i)
 		read(fd, buf, BUF_SIZE);
 
 	close(fd);
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context
  2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
@ 2018-05-03  7:03   ` Tobin C. Harding
  0 siblings, 0 replies; 6+ messages in thread
From: Tobin C. Harding @ 2018-05-03  7:03 UTC (permalink / raw)
  To: Song Liu
  Cc: netdev, kernel-team, qinteng, Alexei Starovoitov,
	Daniel Borkmann, Peter Zijlstra

On Wed, May 02, 2018 at 04:20:29PM -0700, Song Liu wrote:
> Currently, we cannot parse build_id in nmi context because of
> up_read(&current->mm->mmap_sem), this makes stackmap with build_id
> less useful. This patch enables parsing build_id in nmi by putting
> the up_read() call in irq_work. To avoid memory allocation in nmi
> context, we use per cpu variable for the irq_work. As a result, only
> one irq_work per cpu is allowed. If the irq_work is in-use, we
> fallback to only report ips.
> 
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  init/Kconfig          |  1 +
>  kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 54 insertions(+), 6 deletions(-)
> 
> diff --git a/init/Kconfig b/init/Kconfig
> index f013afc..480a4f2 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1391,6 +1391,7 @@ config BPF_SYSCALL
>  	bool "Enable bpf() system call"
>  	select ANON_INODES
>  	select BPF
> +	select IRQ_WORK
>  	default n
>  	help
>  	  Enable the bpf() system call that allows to manipulate eBPF
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 3ba102b..51d4aea 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -11,6 +11,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/elf.h>
>  #include <linux/pagemap.h>
> +#include <linux/irq_work.h>
>  #include "percpu_freelist.h"
>  
>  #define STACK_CREATE_FLAG_MASK					\
> @@ -32,6 +33,23 @@ struct bpf_stack_map {
>  	struct stack_map_bucket *buckets[];
>  };
>  
> +/* irq_work to run up_read() for build_id lookup in nmi context */
> +struct stack_map_irq_work {
> +	struct irq_work irq_work;
> +	struct rw_semaphore *sem;
> +};
> +
> +static void do_up_read(struct irq_work *entry)
> +{
> +	struct stack_map_irq_work *work = container_of(entry,
> +			struct stack_map_irq_work, irq_work);

perhaps:
	struct stack_map_irq_work *work;

	work = container_of(entry, struct stack_map_irq_work, irq_work);
> +	up_read(work->sem);
> +	work->sem = NULL;
> +}
> +
> +static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
> +
>  static inline bool stack_map_use_build_id(struct bpf_map *map)
>  {
>  	return (map->map_flags & BPF_F_STACK_BUILD_ID);
> @@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>  {
>  	int i;
>  	struct vm_area_struct *vma;
> +	bool in_nmi_ctx = in_nmi();
> +	bool irq_work_busy = false;
> +	struct stack_map_irq_work *work;
> +
> +	if (in_nmi_ctx) {
> +		work = this_cpu_ptr(&up_read_work);
> +		if (work->irq_work.flags & IRQ_WORK_BUSY)
> +			/* cannot queue more up_read, fallback */
> +			irq_work_busy = true;
> +	}
>  
>  	/*
> -	 * We cannot do up_read() in nmi context, so build_id lookup is
> -	 * only supported for non-nmi events. If at some point, it is
> -	 * possible to run find_vma() without taking the semaphore, we
> -	 * would like to allow build_id lookup in nmi context.
> +	 * We cannot do up_read() in nmi context. To do build_id lookup
> +	 * in nmi context, we need to run up_read() in irq_work. We use
> +	 * a percpu variable to do the irq_work. If the irq_work is
> +	 * already used by another lookup, we fall back to report ips.
>  	 *
>  	 * Same fallback is used for kernel stack (!user) on a stackmap
>  	 * with build_id.
>  	 */
> -	if (!user || !current || !current->mm || in_nmi() ||
> +	if (!user || !current || !current->mm || irq_work_busy ||
>  	    down_read_trylock(&current->mm->mmap_sem) == 0) {
>  		/* cannot access current->mm, fall back to ips */
>  		for (i = 0; i < trace_nr; i++) {
> @@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>  			- vma->vm_start;
>  		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>  	}
> -	up_read(&current->mm->mmap_sem);
> +
> +	if (!in_nmi_ctx)
> +		up_read(&current->mm->mmap_sem);
> +	else {

perhaps:
	if (!in_nmi_ctx) {
		up_read(&current->mm->mmap_sem);
	} else {


Hope this helps,
Tobin.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
  2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
@ 2018-05-03  7:19   ` Tobin C. Harding
  2018-05-04  6:41     ` Song Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Tobin C. Harding @ 2018-05-03  7:19 UTC (permalink / raw)
  To: Song Liu; +Cc: netdev, kernel-team, qinteng

On Wed, May 02, 2018 at 04:20:30PM -0700, Song Liu wrote:
> This new test captures stackmap with build_id with hardware event
> PERF_COUNT_HW_CPU_CYCLES.
> 
> Because we only support one ips-to-build_id lookup per cpu in NMI
> context, stack_amap will not be able to do the lookup in this test.

           stack_map ?

> Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.
> 
> urandom_read.c is extended to run configurable cycles so that it can be
> caught by the perf event.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/testing/selftests/bpf/test_progs.c   | 137 +++++++++++++++++++++++++++++
>  tools/testing/selftests/bpf/urandom_read.c |  10 ++-
>  2 files changed, 145 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index aa336f0..00bb08c 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -1272,6 +1272,142 @@ static void test_stacktrace_build_id(void)
>  	return;
>  }
>  
> +static void test_stacktrace_build_id_nmi(void)
> +{
> +	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> +	const char *file = "./test_stacktrace_build_id.o";
> +	int err, pmu_fd, prog_fd;
> +	struct perf_event_attr attr = {
> +		.sample_freq = 5000,
> +		.freq = 1,
> +		.type = PERF_TYPE_HARDWARE,
> +		.config = PERF_COUNT_HW_CPU_CYCLES,
> +	};
> +	__u32 key, previous_key, val, duration = 0;
> +	struct bpf_object *obj;
> +	char buf[256];
> +	int i, j;
> +	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
> +	int build_id_matches = 0;
> +
> +	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
> +	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
> +		goto out;
  		    
perhaps:
		return;

> +	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
> +			 0 /* cpu 0 */, -1 /* group id */,
> +			 0 /* flags */);
> +	if (CHECK(pmu_fd < 0, "perf_event_open",
> +		  "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
> +		  pmu_fd, errno))
> +		goto close_prog;
> +
> +	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
> +	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
> +		  err, errno))
> +		goto close_pmu;
> +
> +	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
> +	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
> +		  err, errno))
> +		goto disable_pmu;
> +
> +	/* find map fds */
> +	control_map_fd = bpf_find_map(__func__, obj, "control_map");
> +	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
> +		  "err %d errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
> +	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
> +		  "err %d errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
> +	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
> +		  err, errno))
> +		goto disable_pmu;
> +
> +	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
> +	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
> +		  "err %d errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
> +	       == 0);
> +	assert(system("taskset 0x1 ./urandom_read 100000") == 0);
> +	/* disable stack trace collection */
> +	key = 0;
> +	val = 1;
> +	bpf_map_update_elem(control_map_fd, &key, &val, 0);
> +
> +	/* for every element in stackid_hmap, we can find a corresponding one
> +	 * in stackmap, and vise versa.
> +	 */
> +	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> +	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> +		  "err %d errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> +	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> +		  "err %d errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	err = extract_build_id(buf, 256);
> +
> +	if (CHECK(err, "get build_id with readelf",
> +		  "err %d errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
> +	if (CHECK(err, "get_next_key from stackmap",
> +		  "err %d, errno %d\n", err, errno))
> +		goto disable_pmu;
> +
> +	do {
> +		char build_id[64];
> +
> +		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
> +		if (CHECK(err, "lookup_elem from stackmap",
> +			  "err %d, errno %d\n", err, errno))
> +			goto disable_pmu;
> +		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
> +			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
> +			    id_offs[i].offset != 0) {
> +				for (j = 0; j < 20; ++j)
> +					sprintf(build_id + 2 * j, "%02x",
> +						id_offs[i].build_id[j] & 0xff);
> +				if (strstr(buf, build_id) != NULL)
> +					build_id_matches = 1;
> +			}
> +		previous_key = key;
> +	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
> +
> +	if (CHECK(build_id_matches < 1, "build id match",
> +		  "Didn't find expected build ID from the map\n"))
> +		goto disable_pmu;
> +
> +	/*
> +	 * We intentionally skip compare_stack_ips(). This is because we
> +	 * only support one in_nmi() ips-to-build_id translation per cpu
> +	 * at any time, thus stack_amap here will always fallback to
> +	 * BPF_STACK_BUILD_ID_IP;
> +	 */
> +
> +disable_pmu:
> +	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
> +
> +close_pmu:
> +	close(pmu_fd);
> +
> +close_prog:
> +	bpf_object__close(obj);
> +
> +out:
> +	return;
> +}

No real need for label 'out' right?  We can just return directly and
remove the last three lines of this function.

Hope this helps,
Tobin.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
  2018-05-03  7:19   ` Tobin C. Harding
@ 2018-05-04  6:41     ` Song Liu
  0 siblings, 0 replies; 6+ messages in thread
From: Song Liu @ 2018-05-04  6:41 UTC (permalink / raw)
  To: Tobin C. Harding; +Cc: netdev, Kernel Team, Teng Qin

Thanks Tobin. I will fold these changes in. 

> On May 3, 2018, at 12:19 AM, Tobin C. Harding <tobin@apporbit.com> wrote:
> 
> On Wed, May 02, 2018 at 04:20:30PM -0700, Song Liu wrote:
>> This new test captures stackmap with build_id with hardware event
>> PERF_COUNT_HW_CPU_CYCLES.
>> 
>> Because we only support one ips-to-build_id lookup per cpu in NMI
>> context, stack_amap will not be able to do the lookup in this test.
> 
>         stack_map ?

This one is stack_amap. There are two maps in the test. 

Song

> 
>> Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.
>> 
>> urandom_read.c is extended to run configurable cycles so that it can be
>> caught by the perf event.
>> 
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> tools/testing/selftests/bpf/test_progs.c   | 137 +++++++++++++++++++++++++++++
>> tools/testing/selftests/bpf/urandom_read.c |  10 ++-
>> 2 files changed, 145 insertions(+), 2 deletions(-)
>> 
>> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
>> index aa336f0..00bb08c 100644
>> --- a/tools/testing/selftests/bpf/test_progs.c
>> +++ b/tools/testing/selftests/bpf/test_progs.c
>> @@ -1272,6 +1272,142 @@ static void test_stacktrace_build_id(void)
>> 	return;
>> }
>> 
>> +static void test_stacktrace_build_id_nmi(void)
>> +{
>> +	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
>> +	const char *file = "./test_stacktrace_build_id.o";
>> +	int err, pmu_fd, prog_fd;
>> +	struct perf_event_attr attr = {
>> +		.sample_freq = 5000,
>> +		.freq = 1,
>> +		.type = PERF_TYPE_HARDWARE,
>> +		.config = PERF_COUNT_HW_CPU_CYCLES,
>> +	};
>> +	__u32 key, previous_key, val, duration = 0;
>> +	struct bpf_object *obj;
>> +	char buf[256];
>> +	int i, j;
>> +	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
>> +	int build_id_matches = 0;
>> +
>> +	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
>> +	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
>> +		goto out;
> 		    
> perhaps:
> 		return;
> 
>> +	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
>> +			 0 /* cpu 0 */, -1 /* group id */,
>> +			 0 /* flags */);
>> +	if (CHECK(pmu_fd < 0, "perf_event_open",
>> +		  "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
>> +		  pmu_fd, errno))
>> +		goto close_prog;
>> +
>> +	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
>> +	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
>> +		  err, errno))
>> +		goto close_pmu;
>> +
>> +	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
>> +	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
>> +		  err, errno))
>> +		goto disable_pmu;
>> +
>> +	/* find map fds */
>> +	control_map_fd = bpf_find_map(__func__, obj, "control_map");
>> +	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
>> +		  "err %d errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
>> +	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
>> +		  "err %d errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
>> +	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
>> +		  err, errno))
>> +		goto disable_pmu;
>> +
>> +	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
>> +	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
>> +		  "err %d errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
>> +	       == 0);
>> +	assert(system("taskset 0x1 ./urandom_read 100000") == 0);
>> +	/* disable stack trace collection */
>> +	key = 0;
>> +	val = 1;
>> +	bpf_map_update_elem(control_map_fd, &key, &val, 0);
>> +
>> +	/* for every element in stackid_hmap, we can find a corresponding one
>> +	 * in stackmap, and vise versa.
>> +	 */
>> +	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
>> +	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
>> +		  "err %d errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
>> +	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
>> +		  "err %d errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	err = extract_build_id(buf, 256);
>> +
>> +	if (CHECK(err, "get build_id with readelf",
>> +		  "err %d errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
>> +	if (CHECK(err, "get_next_key from stackmap",
>> +		  "err %d, errno %d\n", err, errno))
>> +		goto disable_pmu;
>> +
>> +	do {
>> +		char build_id[64];
>> +
>> +		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
>> +		if (CHECK(err, "lookup_elem from stackmap",
>> +			  "err %d, errno %d\n", err, errno))
>> +			goto disable_pmu;
>> +		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
>> +			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
>> +			    id_offs[i].offset != 0) {
>> +				for (j = 0; j < 20; ++j)
>> +					sprintf(build_id + 2 * j, "%02x",
>> +						id_offs[i].build_id[j] & 0xff);
>> +				if (strstr(buf, build_id) != NULL)
>> +					build_id_matches = 1;
>> +			}
>> +		previous_key = key;
>> +	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
>> +
>> +	if (CHECK(build_id_matches < 1, "build id match",
>> +		  "Didn't find expected build ID from the map\n"))
>> +		goto disable_pmu;
>> +
>> +	/*
>> +	 * We intentionally skip compare_stack_ips(). This is because we
>> +	 * only support one in_nmi() ips-to-build_id translation per cpu
>> +	 * at any time, thus stack_amap here will always fallback to
>> +	 * BPF_STACK_BUILD_ID_IP;
>> +	 */
>> +
>> +disable_pmu:
>> +	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
>> +
>> +close_pmu:
>> +	close(pmu_fd);
>> +
>> +close_prog:
>> +	bpf_object__close(obj);
>> +
>> +out:
>> +	return;
>> +}
> 
> No real need for label 'out' right?  We can just return directly and
> remove the last three lines of this function.
> 
> Hope this helps,
> Tobin.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-05-04  6:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
2018-05-03  7:03   ` Tobin C. Harding
2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
2018-05-03  7:19   ` Tobin C. Harding
2018-05-04  6:41     ` Song Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.