linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0
@ 2022-03-10  8:22 Namhyung Kim
  2022-03-10  8:22 ` [PATCH 2/2] bpf/selftests: Test skipping stacktrace Namhyung Kim
  2022-03-10 22:54 ` [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Yonghong Song
  0 siblings, 2 replies; 10+ messages in thread
From: Namhyung Kim @ 2022-03-10  8:22 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, netdev, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh, Hao Luo

Let's say that the caller has storage for num_elem stack frames.  Then,
the BPF stack helper functions walk the stack for only num_elem frames.
This means that if skip > 0, one keeps only 'num_elem - skip' frames.

This is because it sets init_nr in the perf_callchain_entry to the end
of the buffer to save num_elem entries only.  I believe it was because
the perf callchain code unwound the stack frames until it reached the
global max size (sysctl_perf_event_max_stack).

However it now has perf_callchain_entry_ctx.max_stack to limit the
iteration locally.  This simplifies the code to handle init_nr in the
BPF callstack entries and removes the confusion with the perf_event's
__PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.

Also change the comment on bpf_get_stack() in the header file to be
more explicit what the return value means.

Based-on-patch-by: Eugene Loh <eugene.loh@oracle.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/uapi/linux/bpf.h |  4 +--
 kernel/bpf/stackmap.c    | 56 +++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..77f4a022c60c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2975,8 +2975,8 @@ union bpf_attr {
  *
  * 			# sysctl kernel.perf_event_max_stack=<new value>
  * 	Return
- * 		A non-negative value equal to or less than *size* on success,
- * 		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
  *
  * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  * 	Description
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 22c8ae94e4c1..2823dcefae10 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -166,7 +166,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 }
 
 static struct perf_callchain_entry *
-get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
+get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
 {
 #ifdef CONFIG_STACKTRACE
 	struct perf_callchain_entry *entry;
@@ -177,9 +177,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
 	if (!entry)
 		return NULL;
 
-	entry->nr = init_nr +
-		stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
-				     sysctl_perf_event_max_stack - init_nr, 0);
+	entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
+					 max_depth, 0);
 
 	/* stack_trace_save_tsk() works on unsigned long array, while
 	 * perf_callchain_entry uses u64 array. For 32-bit systems, it is
@@ -191,7 +190,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
 		int i;
 
 		/* copy data from the end to avoid using extra buffer */
-		for (i = entry->nr - 1; i >= (int)init_nr; i--)
+		for (i = entry->nr - 1; i >= 0; i--)
 			to[i] = (u64)(from[i]);
 	}
 
@@ -208,27 +207,19 @@ static long __bpf_get_stackid(struct bpf_map *map,
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
-	u32 max_depth = map->value_size / stack_map_data_size(map);
-	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
-	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
 	u32 hash, id, trace_nr, trace_len;
 	bool user = flags & BPF_F_USER_STACK;
 	u64 *ips;
 	bool hash_matches;
 
-	/* get_perf_callchain() guarantees that trace->nr >= init_nr
-	 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
-	 */
-	trace_nr = trace->nr - init_nr;
-
-	if (trace_nr <= skip)
+	if (trace->nr <= skip)
 		/* skipping more than usable stack trace */
 		return -EFAULT;
 
-	trace_nr -= skip;
+	trace_nr = trace->nr - skip;
 	trace_len = trace_nr * sizeof(u64);
-	ips = trace->ip + skip + init_nr;
+	ips = trace->ip + skip;
 	hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
 	id = hash & (smap->n_buckets - 1);
 	bucket = READ_ONCE(smap->buckets[id]);
@@ -285,8 +276,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	   u64, flags)
 {
 	u32 max_depth = map->value_size / stack_map_data_size(map);
-	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
-	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
 	bool user = flags & BPF_F_USER_STACK;
 	struct perf_callchain_entry *trace;
 	bool kernel = !user;
@@ -295,8 +285,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
 		return -EINVAL;
 
-	trace = get_perf_callchain(regs, init_nr, kernel, user,
-				   sysctl_perf_event_max_stack, false, false);
+	max_depth += skip;
+	if (max_depth > sysctl_perf_event_max_stack)
+		max_depth = sysctl_perf_event_max_stack;
+
+	trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
+				   false, false);
 
 	if (unlikely(!trace))
 		/* couldn't fetch the stack trace */
@@ -387,7 +381,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 			    struct perf_callchain_entry *trace_in,
 			    void *buf, u32 size, u64 flags)
 {
-	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+	u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
 	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
 	bool user = flags & BPF_F_USER_STACK;
@@ -412,30 +406,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 		goto err_fault;
 
 	num_elem = size / elem_size;
-	if (sysctl_perf_event_max_stack < num_elem)
-		init_nr = 0;
-	else
-		init_nr = sysctl_perf_event_max_stack - num_elem;
+	max_depth = num_elem + skip;
+	if (sysctl_perf_event_max_stack < max_depth)
+		max_depth = sysctl_perf_event_max_stack;
 
 	if (trace_in)
 		trace = trace_in;
 	else if (kernel && task)
-		trace = get_callchain_entry_for_task(task, init_nr);
+		trace = get_callchain_entry_for_task(task, max_depth);
 	else
-		trace = get_perf_callchain(regs, init_nr, kernel, user,
-					   sysctl_perf_event_max_stack,
+		trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
 					   false, false);
 	if (unlikely(!trace))
 		goto err_fault;
 
-	trace_nr = trace->nr - init_nr;
-	if (trace_nr < skip)
+	if (trace->nr < skip)
 		goto err_fault;
 
-	trace_nr -= skip;
+	trace_nr = trace->nr - skip;
 	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
 	copy_len = trace_nr * elem_size;
-	ips = trace->ip + skip + init_nr;
+
+	ips = trace->ip + skip;
 	if (user && user_build_id)
 		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
 	else
-- 
2.35.1.723.g4982287a31-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] bpf/selftests: Test skipping stacktrace
  2022-03-10  8:22 [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Namhyung Kim
@ 2022-03-10  8:22 ` Namhyung Kim
  2022-03-10 23:21   ` Yonghong Song
  2022-03-11 22:23   ` Andrii Nakryiko
  2022-03-10 22:54 ` [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Yonghong Song
  1 sibling, 2 replies; 10+ messages in thread
From: Namhyung Kim @ 2022-03-10  8:22 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, netdev, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh, Hao Luo

Add a test case for stacktrace with skip > 0 using a small sized
buffer.  It didn't support skipping entries greater than or equal to
the size of buffer and filled the skipped part with 0.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 .../bpf/prog_tests/stacktrace_map_skip.c      | 72 ++++++++++++++++
 .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
 create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
new file mode 100644
index 000000000000..bcb244aa3c78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_map_skip.skel.h"
+
+#define TEST_STACK_DEPTH  2
+
+void test_stacktrace_map_skip(void)
+{
+	struct stacktrace_map_skip *skel;
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	int err, stack_trace_len;
+	__u32 key, val, duration = 0;
+
+	skel = stacktrace_map_skip__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
+		return;
+
+	/* find map fds */
+	control_map_fd = bpf_map__fd(skel->maps.control_map);
+	if (CHECK_FAIL(control_map_fd < 0))
+		goto out;
+
+	stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+	if (CHECK_FAIL(stackid_hmap_fd < 0))
+		goto out;
+
+	stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+	if (CHECK_FAIL(stackmap_fd < 0))
+		goto out;
+
+	stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+	if (CHECK_FAIL(stack_amap_fd < 0))
+		goto out;
+
+	err = stacktrace_map_skip__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
+		goto out;
+
+	/* give some time for bpf program run */
+	sleep(1);
+
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto out;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto out;
+
+	stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+	if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto out;
+
+	if (CHECK(skel->bss->failed, "check skip",
+		  "failed to skip some depth: %d", skel->bss->failed))
+		goto out;
+
+out:
+	stacktrace_map_skip__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
new file mode 100644
index 000000000000..323248b17ae4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define TEST_STACK_DEPTH         2
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} control_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, __u32);
+} stackid_hmap SEC(".maps");
+
+typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stack_amap SEC(".maps");
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+	unsigned long long pad;
+	char prev_comm[TASK_COMM_LEN];
+	int prev_pid;
+	int prev_prio;
+	long long prev_state;
+	char next_comm[TASK_COMM_LEN];
+	int next_pid;
+	int next_prio;
+};
+
+int failed = 0;
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+	__u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
+	__u32 key = 0, val = 0, *value_p;
+	__u64 *stack_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* it should allow skipping whole buffer size entries */
+	key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
+	if ((int)key >= 0) {
+		/* The size of stackmap and stack_amap should be the same */
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+		if (stack_p) {
+			bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
+			/* it wrongly skipped all the entries and filled zero */
+			if (stack_p[0] == 0)
+				failed = 1;
+		}
+	} else if ((int)key == -14/*EFAULT*/) {
+		/* old kernel doesn't support skipping that many entries */
+		failed = 2;
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.35.1.723.g4982287a31-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0
  2022-03-10  8:22 [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Namhyung Kim
  2022-03-10  8:22 ` [PATCH 2/2] bpf/selftests: Test skipping stacktrace Namhyung Kim
@ 2022-03-10 22:54 ` Yonghong Song
  2022-03-11  0:23   ` Hao Luo
  2022-03-11  0:32   ` Namhyung Kim
  1 sibling, 2 replies; 10+ messages in thread
From: Yonghong Song @ 2022-03-10 22:54 UTC (permalink / raw)
  To: Namhyung Kim, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: Martin KaFai Lau, Song Liu, John Fastabend, KP Singh, netdev,
	bpf, LKML, Arnaldo Carvalho de Melo, Peter Zijlstra, Eugene Loh,
	Hao Luo



On 3/10/22 12:22 AM, Namhyung Kim wrote:
> Let's say that the caller has storage for num_elem stack frames.  Then,
> the BPF stack helper functions walk the stack for only num_elem frames.
> This means that if skip > 0, one keeps only 'num_elem - skip' frames.
> 
> This is because it sets init_nr in the perf_callchain_entry to the end
> of the buffer to save num_elem entries only.  I believe it was because
> the perf callchain code unwound the stack frames until it reached the
> global max size (sysctl_perf_event_max_stack).
> 
> However it now has perf_callchain_entry_ctx.max_stack to limit the
> iteration locally.  This simplifies the code to handle init_nr in the
> BPF callstack entries and removes the confusion with the perf_event's
> __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.
> 
> Also change the comment on bpf_get_stack() in the header file to be
> more explicit what the return value means.
> 
> Based-on-patch-by: Eugene Loh <eugene.loh@oracle.com>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>

The change looks good to me. This patch actually fixed a bug
discussed below:

 
https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com/

A reference to the above link in the commit message
will be useful for people to understand better with an
example.

Also, the following fixes tag should be added:

Fixes: c195651e565a ("bpf: add bpf_get_stack helper")

Since the bug needs skip > 0 which is seldomly used,
and the current returned stack is still correct although
with less entries, I guess that is why less people
complains.

Anyway, ack the patch:
Acked-by: Yonghong Song <yhs@fb.com>


> ---
>   include/uapi/linux/bpf.h |  4 +--
>   kernel/bpf/stackmap.c    | 56 +++++++++++++++++-----------------------
>   2 files changed, 26 insertions(+), 34 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b0383d371b9a..77f4a022c60c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2975,8 +2975,8 @@ union bpf_attr {
>    *
>    * 			# sysctl kernel.perf_event_max_stack=<new value>
>    * 	Return
> - * 		A non-negative value equal to or less than *size* on success,
> - * 		or a negative error in case of failure.
> + * 		The non-negative copied *buf* length equal to or less than
> + * 		*size* on success, or a negative error in case of failure.
>    *
>    * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
[...]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace
  2022-03-10  8:22 ` [PATCH 2/2] bpf/selftests: Test skipping stacktrace Namhyung Kim
@ 2022-03-10 23:21   ` Yonghong Song
  2022-03-11  0:40     ` Namhyung Kim
  2022-03-11 22:23   ` Andrii Nakryiko
  1 sibling, 1 reply; 10+ messages in thread
From: Yonghong Song @ 2022-03-10 23:21 UTC (permalink / raw)
  To: Namhyung Kim, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: Martin KaFai Lau, Song Liu, John Fastabend, KP Singh, netdev,
	bpf, LKML, Arnaldo Carvalho de Melo, Peter Zijlstra, Eugene Loh,
	Hao Luo



On 3/10/22 12:22 AM, Namhyung Kim wrote:
> Add a test case for stacktrace with skip > 0 using a small sized
> buffer.  It didn't support skipping entries greater than or equal to
> the size of buffer and filled the skipped part with 0.
> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>   .../bpf/prog_tests/stacktrace_map_skip.c      | 72 ++++++++++++++++
>   .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
>   2 files changed, 154 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
>   create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..bcb244aa3c78
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> @@ -0,0 +1,72 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +#include "stacktrace_map_skip.skel.h"
> +
> +#define TEST_STACK_DEPTH  2
> +
> +void test_stacktrace_map_skip(void)
> +{
> +	struct stacktrace_map_skip *skel;
> +	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> +	int err, stack_trace_len;
> +	__u32 key, val, duration = 0;
> +
> +	skel = stacktrace_map_skip__open_and_load();
> +	if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
> +		return;

Please use ASSERT_* macros instead of CHECK* macros.
You can see other prog_tests/*.c files for examples.

> +
> +	/* find map fds */
> +	control_map_fd = bpf_map__fd(skel->maps.control_map);
> +	if (CHECK_FAIL(control_map_fd < 0))
> +		goto out;
> +
> +	stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
> +	if (CHECK_FAIL(stackid_hmap_fd < 0))
> +		goto out;
> +
> +	stackmap_fd = bpf_map__fd(skel->maps.stackmap);
> +	if (CHECK_FAIL(stackmap_fd < 0))
> +		goto out;
> +
> +	stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
> +	if (CHECK_FAIL(stack_amap_fd < 0))
> +		goto out;
> +
> +	err = stacktrace_map_skip__attach(skel);
> +	if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
> +		goto out;
> +
> +	/* give some time for bpf program run */
> +	sleep(1);
> +
> +	/* disable stack trace collection */
> +	key = 0;
> +	val = 1;
> +	bpf_map_update_elem(control_map_fd, &key, &val, 0);
> +
> +	/* for every element in stackid_hmap, we can find a corresponding one
> +	 * in stackmap, and vise versa.
> +	 */
> +	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> +	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> +		  "err %d errno %d\n", err, errno))
> +		goto out;
> +
> +	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> +	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> +		  "err %d errno %d\n", err, errno))
> +		goto out;
> +
> +	stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
> +	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
> +	if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
> +		  "err %d errno %d\n", err, errno))
> +		goto out;
> +
> +	if (CHECK(skel->bss->failed, "check skip",
> +		  "failed to skip some depth: %d", skel->bss->failed))
> +		goto out;
> +
> +out:
> +	stacktrace_map_skip__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..323248b17ae4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define TEST_STACK_DEPTH         2
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_ARRAY);
> +	__uint(max_entries, 1);
> +	__type(key, __u32);
> +	__type(value, __u32);
> +} control_map SEC(".maps");

You can use a global variable for this.
The global variable can be assigned a value (if needed, e.g., non-zero)
before skeleton open and load.

> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_HASH);
> +	__uint(max_entries, 16384);
> +	__type(key, __u32);
> +	__type(value, __u32);
> +} stackid_hmap SEC(".maps");
> +
> +typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
> +	__uint(max_entries, 16384);
> +	__type(key, __u32);
> +	__type(value, stack_trace_t);
> +} stackmap SEC(".maps");
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_ARRAY);
> +	__uint(max_entries, 16384);
> +	__type(key, __u32);
> +	__type(value, stack_trace_t);
> +} stack_amap SEC(".maps");
> +
> +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
> +struct sched_switch_args {
> +	unsigned long long pad;
> +	char prev_comm[TASK_COMM_LEN];
> +	int prev_pid;
> +	int prev_prio;
> +	long long prev_state;
> +	char next_comm[TASK_COMM_LEN];
> +	int next_pid;
> +	int next_prio;
> +};

You can use this structure in vmlinux.h instead of the above:
struct trace_event_raw_sched_switch {
         struct trace_entry ent;
         char prev_comm[16];
         pid_t prev_pid;
         int prev_prio;
         long int prev_state;
         char next_comm[16];
         pid_t next_pid;
         int next_prio;
         char __data[0];
};


> +
> +int failed = 0;
> +
> +SEC("tracepoint/sched/sched_switch")
> +int oncpu(struct sched_switch_args *ctx)
> +{
> +	__u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> +	__u32 key = 0, val = 0, *value_p;
> +	__u64 *stack_p;
> +
> +	value_p = bpf_map_lookup_elem(&control_map, &key);
> +	if (value_p && *value_p)
> +		return 0; /* skip if non-zero *value_p */
> +
> +	/* it should allow skipping whole buffer size entries */
> +	key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> +	if ((int)key >= 0) {
> +		/* The size of stackmap and stack_amap should be the same */
> +		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> +		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> +		if (stack_p) {
> +			bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> +			/* it wrongly skipped all the entries and filled zero */
> +			if (stack_p[0] == 0)
> +				failed = 1;
> +		}
> +	} else if ((int)key == -14/*EFAULT*/) {
> +		/* old kernel doesn't support skipping that many entries */
> +		failed = 2;

The selftest is supposed to run with the kernel in the same code base.
So it is okay to skip the above 'if' test and just set failed = 2.

> +	}
> +
> +	return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0
  2022-03-10 22:54 ` [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Yonghong Song
@ 2022-03-11  0:23   ` Hao Luo
  2022-03-11  0:33     ` Namhyung Kim
  2022-03-11  0:32   ` Namhyung Kim
  1 sibling, 1 reply; 10+ messages in thread
From: Hao Luo @ 2022-03-11  0:23 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Namhyung Kim, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, John Fastabend,
	KP Singh, netdev, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh

On Thu, Mar 10, 2022 at 2:54 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 3/10/22 12:22 AM, Namhyung Kim wrote:
> > Let's say that the caller has storage for num_elem stack frames.  Then,
> > the BPF stack helper functions walk the stack for only num_elem frames.
> > This means that if skip > 0, one keeps only 'num_elem - skip' frames.
> >
> > This is because it sets init_nr in the perf_callchain_entry to the end
> > of the buffer to save num_elem entries only.  I believe it was because
> > the perf callchain code unwound the stack frames until it reached the
> > global max size (sysctl_perf_event_max_stack).
> >
> > However it now has perf_callchain_entry_ctx.max_stack to limit the
> > iteration locally.  This simplifies the code to handle init_nr in the
> > BPF callstack entries and removes the confusion with the perf_event's
> > __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.
> >
> > Also change the comment on bpf_get_stack() in the header file to be
> > more explicit what the return value means.
> >
> > Based-on-patch-by: Eugene Loh <eugene.loh@oracle.com>
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
>
> The change looks good to me. This patch actually fixed a bug
> discussed below:
>
>
> https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com/
>
> A reference to the above link in the commit message
> will be useful for people to understand better with an
> example.
>
> Also, the following fixes tag should be added:
>
> Fixes: c195651e565a ("bpf: add bpf_get_stack helper")
>
> Since the bug needs skip > 0 which is seldomly used,
> and the current returned stack is still correct although
> with less entries, I guess that is why less people
> complains.
>
> Anyway, ack the patch:
> Acked-by: Yonghong Song <yhs@fb.com>
>
>
> > ---
> >   include/uapi/linux/bpf.h |  4 +--
> >   kernel/bpf/stackmap.c    | 56 +++++++++++++++++-----------------------
> >   2 files changed, 26 insertions(+), 34 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index b0383d371b9a..77f4a022c60c 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2975,8 +2975,8 @@ union bpf_attr {
> >    *
> >    *                  # sysctl kernel.perf_event_max_stack=<new value>
> >    *  Return
> > - *           A non-negative value equal to or less than *size* on success,
> > - *           or a negative error in case of failure.
> > + *           The non-negative copied *buf* length equal to or less than
> > + *           *size* on success, or a negative error in case of failure.
> >    *
> >    * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)

Namhyung, I think you also need to mirror the change in
tools/include/uapi/linux/bpf.h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0
  2022-03-10 22:54 ` [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Yonghong Song
  2022-03-11  0:23   ` Hao Luo
@ 2022-03-11  0:32   ` Namhyung Kim
  1 sibling, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2022-03-11  0:32 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, John Fastabend, KP Singh,
	Network Development, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh, Hao Luo

Hello,

On Thu, Mar 10, 2022 at 2:55 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 3/10/22 12:22 AM, Namhyung Kim wrote:
> > Let's say that the caller has storage for num_elem stack frames.  Then,
> > the BPF stack helper functions walk the stack for only num_elem frames.
> > This means that if skip > 0, one keeps only 'num_elem - skip' frames.
> >
> > This is because it sets init_nr in the perf_callchain_entry to the end
> > of the buffer to save num_elem entries only.  I believe it was because
> > the perf callchain code unwound the stack frames until it reached the
> > global max size (sysctl_perf_event_max_stack).
> >
> > However it now has perf_callchain_entry_ctx.max_stack to limit the
> > iteration locally.  This simplifies the code to handle init_nr in the
> > BPF callstack entries and removes the confusion with the perf_event's
> > __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0.
> >
> > Also change the comment on bpf_get_stack() in the header file to be
> > more explicit what the return value means.
> >
> > Based-on-patch-by: Eugene Loh <eugene.loh@oracle.com>
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
>
> The change looks good to me. This patch actually fixed a bug
> discussed below:
>
>
> https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com/
>
> A reference to the above link in the commit message
> will be useful for people to understand better with an
> example.

Ok, will do.

>
> Also, the following fixes tag should be added:
>
> Fixes: c195651e565a ("bpf: add bpf_get_stack helper")

Sure.

>
> Since the bug needs skip > 0 which is seldomly used,
> and the current returned stack is still correct although
> with less entries, I guess that is why less people
> complains.
>
> Anyway, ack the patch:
> Acked-by: Yonghong Song <yhs@fb.com>

Thanks for your review!

Namhyung


>
>
> > ---
> >   include/uapi/linux/bpf.h |  4 +--
> >   kernel/bpf/stackmap.c    | 56 +++++++++++++++++-----------------------
> >   2 files changed, 26 insertions(+), 34 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index b0383d371b9a..77f4a022c60c 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2975,8 +2975,8 @@ union bpf_attr {
> >    *
> >    *                  # sysctl kernel.perf_event_max_stack=<new value>
> >    *  Return
> > - *           A non-negative value equal to or less than *size* on success,
> > - *           or a negative error in case of failure.
> > + *           The non-negative copied *buf* length equal to or less than
> > + *           *size* on success, or a negative error in case of failure.
> >    *
> >    * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
> [...]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0
  2022-03-11  0:23   ` Hao Luo
@ 2022-03-11  0:33     ` Namhyung Kim
  0 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2022-03-11  0:33 UTC (permalink / raw)
  To: Hao Luo
  Cc: Yonghong Song, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, John Fastabend,
	KP Singh, Network Development, bpf, LKML,
	Arnaldo Carvalho de Melo, Peter Zijlstra, Eugene Loh

Hi Hao,

On Thu, Mar 10, 2022 at 4:24 PM Hao Luo <haoluo@google.com> wrote:
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index b0383d371b9a..77f4a022c60c 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -2975,8 +2975,8 @@ union bpf_attr {
> > >    *
> > >    *                  # sysctl kernel.perf_event_max_stack=<new value>
> > >    *  Return
> > > - *           A non-negative value equal to or less than *size* on success,
> > > - *           or a negative error in case of failure.
> > > + *           The non-negative copied *buf* length equal to or less than
> > > + *           *size* on success, or a negative error in case of failure.
> > >    *
> > >    * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
>
> Namhyung, I think you also need to mirror the change in
> tools/include/uapi/linux/bpf.h

Oh, right.  Will update.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace
  2022-03-10 23:21   ` Yonghong Song
@ 2022-03-11  0:40     ` Namhyung Kim
  0 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2022-03-11  0:40 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, John Fastabend, KP Singh,
	Network Development, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh, Hao Luo

On Thu, Mar 10, 2022 at 3:22 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 3/10/22 12:22 AM, Namhyung Kim wrote:
> > Add a test case for stacktrace with skip > 0 using a small sized
> > buffer.  It didn't support skipping entries greater than or equal to
> > the size of buffer and filled the skipped part with 0.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >   .../bpf/prog_tests/stacktrace_map_skip.c      | 72 ++++++++++++++++
> >   .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
> >   2 files changed, 154 insertions(+)
> >   create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> >   create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> > new file mode 100644
> > index 000000000000..bcb244aa3c78
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> > @@ -0,0 +1,72 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <test_progs.h>
> > +#include "stacktrace_map_skip.skel.h"
> > +
> > +#define TEST_STACK_DEPTH  2
> > +
> > +void test_stacktrace_map_skip(void)
> > +{
> > +     struct stacktrace_map_skip *skel;
> > +     int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> > +     int err, stack_trace_len;
> > +     __u32 key, val, duration = 0;
> > +
> > +     skel = stacktrace_map_skip__open_and_load();
> > +     if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
> > +             return;
>
> Please use ASSERT_* macros instead of CHECK* macros.
> You can see other prog_tests/*.c files for examples.

I'll take a look and make the changes.

>
> > +
> > +     /* find map fds */
> > +     control_map_fd = bpf_map__fd(skel->maps.control_map);
> > +     if (CHECK_FAIL(control_map_fd < 0))
> > +             goto out;
> > +
> > +     stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
> > +     if (CHECK_FAIL(stackid_hmap_fd < 0))
> > +             goto out;
> > +
> > +     stackmap_fd = bpf_map__fd(skel->maps.stackmap);
> > +     if (CHECK_FAIL(stackmap_fd < 0))
> > +             goto out;
> > +
> > +     stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
> > +     if (CHECK_FAIL(stack_amap_fd < 0))
> > +             goto out;
> > +
> > +     err = stacktrace_map_skip__attach(skel);
> > +     if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
> > +             goto out;
> > +
> > +     /* give some time for bpf program run */
> > +     sleep(1);
> > +
> > +     /* disable stack trace collection */
> > +     key = 0;
> > +     val = 1;
> > +     bpf_map_update_elem(control_map_fd, &key, &val, 0);
> > +
> > +     /* for every element in stackid_hmap, we can find a corresponding one
> > +      * in stackmap, and vise versa.
> > +      */
> > +     err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> > +     if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> > +               "err %d errno %d\n", err, errno))
> > +             goto out;
> > +
> > +     err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> > +     if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> > +               "err %d errno %d\n", err, errno))
> > +             goto out;
> > +
> > +     stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
> > +     err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
> > +     if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
> > +               "err %d errno %d\n", err, errno))
> > +             goto out;
> > +
> > +     if (CHECK(skel->bss->failed, "check skip",
> > +               "failed to skip some depth: %d", skel->bss->failed))
> > +             goto out;
> > +
> > +out:
> > +     stacktrace_map_skip__destroy(skel);
> > +}
> > diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> > new file mode 100644
> > index 000000000000..323248b17ae4
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> > @@ -0,0 +1,82 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <vmlinux.h>
> > +#include <bpf/bpf_helpers.h>
> > +
> > +#define TEST_STACK_DEPTH         2
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_ARRAY);
> > +     __uint(max_entries, 1);
> > +     __type(key, __u32);
> > +     __type(value, __u32);
> > +} control_map SEC(".maps");
>
> You can use a global variable for this.
> The global variable can be assigned a value (if needed, e.g., non-zero)
> before skeleton open and load.

Right, will change.

>
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_HASH);
> > +     __uint(max_entries, 16384);
> > +     __type(key, __u32);
> > +     __type(value, __u32);
> > +} stackid_hmap SEC(".maps");
> > +
> > +typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> > +     __uint(max_entries, 16384);
> > +     __type(key, __u32);
> > +     __type(value, stack_trace_t);
> > +} stackmap SEC(".maps");
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_ARRAY);
> > +     __uint(max_entries, 16384);
> > +     __type(key, __u32);
> > +     __type(value, stack_trace_t);
> > +} stack_amap SEC(".maps");
> > +
> > +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
> > +struct sched_switch_args {
> > +     unsigned long long pad;
> > +     char prev_comm[TASK_COMM_LEN];
> > +     int prev_pid;
> > +     int prev_prio;
> > +     long long prev_state;
> > +     char next_comm[TASK_COMM_LEN];
> > +     int next_pid;
> > +     int next_prio;
> > +};
>
> You can use this structure in vmlinux.h instead of the above:
> struct trace_event_raw_sched_switch {
>          struct trace_entry ent;
>          char prev_comm[16];
>          pid_t prev_pid;
>          int prev_prio;
>          long int prev_state;
>          char next_comm[16];
>          pid_t next_pid;
>          int next_prio;
>          char __data[0];
> };

Looks good, will change.

>
> > +
> > +int failed = 0;
> > +
> > +SEC("tracepoint/sched/sched_switch")
> > +int oncpu(struct sched_switch_args *ctx)
> > +{
> > +     __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> > +     __u32 key = 0, val = 0, *value_p;
> > +     __u64 *stack_p;
> > +
> > +     value_p = bpf_map_lookup_elem(&control_map, &key);
> > +     if (value_p && *value_p)
> > +             return 0; /* skip if non-zero *value_p */
> > +
> > +     /* it should allow skipping whole buffer size entries */
> > +     key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> > +     if ((int)key >= 0) {
> > +             /* The size of stackmap and stack_amap should be the same */
> > +             bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> > +             stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> > +             if (stack_p) {
> > +                     bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> > +                     /* it wrongly skipped all the entries and filled zero */
> > +                     if (stack_p[0] == 0)
> > +                             failed = 1;
> > +             }
> > +     } else if ((int)key == -14/*EFAULT*/) {
> > +             /* old kernel doesn't support skipping that many entries */
> > +             failed = 2;
>
> The selftest is supposed to run with the kernel in the same code base.
> So it is okay to skip the above 'if' test and just set failed = 2.

I see.  I will make the change.

Thanks,
Namhyung

>
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace
  2022-03-10  8:22 ` [PATCH 2/2] bpf/selftests: Test skipping stacktrace Namhyung Kim
  2022-03-10 23:21   ` Yonghong Song
@ 2022-03-11 22:23   ` Andrii Nakryiko
  2022-03-14 17:28     ` Namhyung Kim
  1 sibling, 1 reply; 10+ messages in thread
From: Andrii Nakryiko @ 2022-03-11 22:23 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Networking, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh, Hao Luo

On Thu, Mar 10, 2022 at 12:22 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Add a test case for stacktrace with skip > 0 using a small sized
> buffer.  It didn't support skipping entries greater than or equal to
> the size of buffer and filled the skipped part with 0.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  .../bpf/prog_tests/stacktrace_map_skip.c      | 72 ++++++++++++++++
>  .../selftests/bpf/progs/stacktrace_map_skip.c | 82 +++++++++++++++++++
>  2 files changed, 154 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
>  create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..bcb244aa3c78
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
> @@ -0,0 +1,72 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +#include "stacktrace_map_skip.skel.h"
> +
> +#define TEST_STACK_DEPTH  2
> +
> +void test_stacktrace_map_skip(void)
> +{
> +       struct stacktrace_map_skip *skel;
> +       int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> +       int err, stack_trace_len;
> +       __u32 key, val, duration = 0;
> +
> +       skel = stacktrace_map_skip__open_and_load();
> +       if (CHECK(!skel, "skel_open_and_load", "skeleton open failed\n"))
> +               return;
> +
> +       /* find map fds */
> +       control_map_fd = bpf_map__fd(skel->maps.control_map);
> +       if (CHECK_FAIL(control_map_fd < 0))
> +               goto out;
> +
> +       stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
> +       if (CHECK_FAIL(stackid_hmap_fd < 0))
> +               goto out;
> +
> +       stackmap_fd = bpf_map__fd(skel->maps.stackmap);
> +       if (CHECK_FAIL(stackmap_fd < 0))
> +               goto out;
> +
> +       stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
> +       if (CHECK_FAIL(stack_amap_fd < 0))
> +               goto out;
> +
> +       err = stacktrace_map_skip__attach(skel);
> +       if (CHECK(err, "skel_attach", "skeleton attach failed\n"))
> +               goto out;
> +
> +       /* give some time for bpf program run */
> +       sleep(1);
> +
> +       /* disable stack trace collection */
> +       key = 0;
> +       val = 1;
> +       bpf_map_update_elem(control_map_fd, &key, &val, 0);
> +
> +       /* for every element in stackid_hmap, we can find a corresponding one
> +        * in stackmap, and vise versa.
> +        */
> +       err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> +       if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> +                 "err %d errno %d\n", err, errno))
> +               goto out;
> +
> +       err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> +       if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> +                 "err %d errno %d\n", err, errno))
> +               goto out;
> +
> +       stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
> +       err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
> +       if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
> +                 "err %d errno %d\n", err, errno))
> +               goto out;
> +
> +       if (CHECK(skel->bss->failed, "check skip",
> +                 "failed to skip some depth: %d", skel->bss->failed))
> +               goto out;
> +
> +out:
> +       stacktrace_map_skip__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> new file mode 100644
> index 000000000000..323248b17ae4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define TEST_STACK_DEPTH         2
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_ARRAY);
> +       __uint(max_entries, 1);
> +       __type(key, __u32);
> +       __type(value, __u32);
> +} control_map SEC(".maps");
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_HASH);
> +       __uint(max_entries, 16384);
> +       __type(key, __u32);
> +       __type(value, __u32);
> +} stackid_hmap SEC(".maps");
> +
> +typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> +       __uint(max_entries, 16384);
> +       __type(key, __u32);
> +       __type(value, stack_trace_t);
> +} stackmap SEC(".maps");
> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_ARRAY);
> +       __uint(max_entries, 16384);
> +       __type(key, __u32);
> +       __type(value, stack_trace_t);
> +} stack_amap SEC(".maps");
> +
> +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
> +struct sched_switch_args {
> +       unsigned long long pad;
> +       char prev_comm[TASK_COMM_LEN];
> +       int prev_pid;
> +       int prev_prio;
> +       long long prev_state;
> +       char next_comm[TASK_COMM_LEN];
> +       int next_pid;
> +       int next_prio;
> +};
> +
> +int failed = 0;
> +
> +SEC("tracepoint/sched/sched_switch")
> +int oncpu(struct sched_switch_args *ctx)
> +{
> +       __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> +       __u32 key = 0, val = 0, *value_p;
> +       __u64 *stack_p;
> +

please also add filtering by PID to avoid interference from other
selftests when run in parallel mode

> +       value_p = bpf_map_lookup_elem(&control_map, &key);
> +       if (value_p && *value_p)
> +               return 0; /* skip if non-zero *value_p */
> +
> +       /* it should allow skipping whole buffer size entries */
> +       key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> +       if ((int)key >= 0) {
> +               /* The size of stackmap and stack_amap should be the same */
> +               bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> +               stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> +               if (stack_p) {
> +                       bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> +                       /* it wrongly skipped all the entries and filled zero */
> +                       if (stack_p[0] == 0)
> +                               failed = 1;
> +               }
> +       } else if ((int)key == -14/*EFAULT*/) {
> +               /* old kernel doesn't support skipping that many entries */
> +               failed = 2;
> +       }
> +
> +       return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.35.1.723.g4982287a31-goog
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] bpf/selftests: Test skipping stacktrace
  2022-03-11 22:23   ` Andrii Nakryiko
@ 2022-03-14 17:28     ` Namhyung Kim
  0 siblings, 0 replies; 10+ messages in thread
From: Namhyung Kim @ 2022-03-14 17:28 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Networking, bpf, LKML, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Eugene Loh, Hao Luo

Hello,

On Fri, Mar 11, 2022 at 2:23 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Mar 10, 2022 at 12:22 AM Namhyung Kim <namhyung@kernel.org> wrote:
> > +SEC("tracepoint/sched/sched_switch")
> > +int oncpu(struct sched_switch_args *ctx)
> > +{
> > +       __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
> > +       __u32 key = 0, val = 0, *value_p;
> > +       __u64 *stack_p;
> > +
>
> please also add filtering by PID to avoid interference from other
> selftests when run in parallel mode

Will do!

Thanks,
Namhyung

>
> > +       value_p = bpf_map_lookup_elem(&control_map, &key);
> > +       if (value_p && *value_p)
> > +               return 0; /* skip if non-zero *value_p */
> > +
> > +       /* it should allow skipping whole buffer size entries */
> > +       key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
> > +       if ((int)key >= 0) {
> > +               /* The size of stackmap and stack_amap should be the same */
> > +               bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
> > +               stack_p = bpf_map_lookup_elem(&stack_amap, &key);
> > +               if (stack_p) {
> > +                       bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
> > +                       /* it wrongly skipped all the entries and filled zero */
> > +                       if (stack_p[0] == 0)
> > +                               failed = 1;
> > +               }
> > +       } else if ((int)key == -14/*EFAULT*/) {
> > +               /* old kernel doesn't support skipping that many entries */
> > +               failed = 2;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
> > --
> > 2.35.1.723.g4982287a31-goog
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-03-14 17:29 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-10  8:22 [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Namhyung Kim
2022-03-10  8:22 ` [PATCH 2/2] bpf/selftests: Test skipping stacktrace Namhyung Kim
2022-03-10 23:21   ` Yonghong Song
2022-03-11  0:40     ` Namhyung Kim
2022-03-11 22:23   ` Andrii Nakryiko
2022-03-14 17:28     ` Namhyung Kim
2022-03-10 22:54 ` [PATCH 1/2] bpf: Adjust BPF stack helper functions to accommodate skip > 0 Yonghong Song
2022-03-11  0:23   ` Hao Luo
2022-03-11  0:33     ` Namhyung Kim
2022-03-11  0:32   ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).