netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/3] bpf: introduce bpf_get_task_stack_trace()
@ 2020-06-23  7:07 Song Liu
  2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23  7:07 UTC (permalink / raw)
  To: bpf, netdev; +Cc: ast, daniel, kernel-team, john.fastabend, kpsingh, Song Liu

This set introduces a new helper bpf_get_task_stack_trace(). The primary
use case is to dump all /proc/*/stack to seq_file via bpf_iter__task.

Song Liu (3):
  bpf: introduce helper bpf_get_task_stack_trace()
  bpf: allow %pB in bpf_seq_printf()
  selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()

 include/uapi/linux/bpf.h                      | 10 +++-
 kernel/trace/bpf_trace.c                      | 24 ++++++++-
 scripts/bpf_helpers_doc.py                    |  2 +
 tools/include/uapi/linux/bpf.h                | 10 +++-
 .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
 .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
 6 files changed, 110 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c

--
2.24.1

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23  7:07 [PATCH bpf-next 0/3] bpf: introduce bpf_get_task_stack_trace() Song Liu
@ 2020-06-23  7:08 ` Song Liu
  2020-06-23 15:19   ` Alexei Starovoitov
                     ` (2 more replies)
  2020-06-23  7:08 ` [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf() Song Liu
  2020-06-23  7:08 ` [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace() Song Liu
  2 siblings, 3 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23  7:08 UTC (permalink / raw)
  To: bpf, netdev; +Cc: ast, daniel, kernel-team, john.fastabend, kpsingh, Song Liu

This helper can be used with bpf_iter__task to dump all /proc/*/stack to
a seq_file.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/uapi/linux/bpf.h       | 10 +++++++++-
 kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 10 +++++++++-
 4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 19684813faaed..a30416b822fe3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3252,6 +3252,13 @@ union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
+ *	Description
+ *		Save a task stack trace into array *entries*. This is a wrapper
+ *		over stack_trace_save_tsk().
+ *	Return
+ *		Number of trace entries stored.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3389,7 +3396,8 @@ union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(get_task_stack_trace),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e729c9e587a07..2c13bcb5c2bce 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
 	.arg4_type	= ARG_ANYTHING,
 };

+BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
+	   void *, entries, u32, size)
+{
+	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
+}
+
+static int bpf_get_task_stack_trace_btf_ids[5];
+static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
+	.func		= bpf_get_task_stack_trace,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.btf_id		= bpf_get_task_stack_trace_btf_ids,
+};
+
 static const struct bpf_func_proto *
 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
 		       &bpf_seq_write_proto :
 		       NULL;
+	case BPF_FUNC_get_task_stack_trace:
+		return prog->expected_attach_type == BPF_TRACE_ITER ?
+			&bpf_get_task_stack_trace_proto :
+			NULL;
 	default:
 		return raw_tp_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 91fa668fa8602..a8783d668c5b7 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -421,6 +421,7 @@ class PrinterHelpers(Printer):
             'struct sockaddr',
             'struct tcphdr',
             'struct seq_file',
+            'struct task_struct',

             'struct __sk_buff',
             'struct sk_msg_md',
@@ -458,6 +459,7 @@ class PrinterHelpers(Printer):
             'struct sockaddr',
             'struct tcphdr',
             'struct seq_file',
+            'struct task_struct',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 19684813faaed..a30416b822fe3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3252,6 +3252,13 @@ union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
+ *	Description
+ *		Save a task stack trace into array *entries*. This is a wrapper
+ *		over stack_trace_save_tsk().
+ *	Return
+ *		Number of trace entries stored.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3389,7 +3396,8 @@ union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(get_task_stack_trace),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
--
2.24.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf()
  2020-06-23  7:07 [PATCH bpf-next 0/3] bpf: introduce bpf_get_task_stack_trace() Song Liu
  2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
@ 2020-06-23  7:08 ` Song Liu
  2020-06-23 15:29   ` Daniel Borkmann
  2020-06-23  7:08 ` [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace() Song Liu
  2 siblings, 1 reply; 19+ messages in thread
From: Song Liu @ 2020-06-23  7:08 UTC (permalink / raw)
  To: bpf, netdev; +Cc: ast, daniel, kernel-team, john.fastabend, kpsingh, Song Liu

This makes it easy to dump stack trace with bpf_seq_printf().

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/trace/bpf_trace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2c13bcb5c2bce..ced3176801ae8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -636,7 +636,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 		if (fmt[i] == 'p') {
 			if (fmt[i + 1] == 0 ||
 			    fmt[i + 1] == 'K' ||
-			    fmt[i + 1] == 'x') {
+			    fmt[i + 1] == 'x' ||
+			    fmt[i + 1] == 'B') {
 				/* just kernel pointers */
 				params[fmt_cnt] = args[fmt_cnt];
 				fmt_cnt++;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()
  2020-06-23  7:07 [PATCH bpf-next 0/3] bpf: introduce bpf_get_task_stack_trace() Song Liu
  2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
  2020-06-23  7:08 ` [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf() Song Liu
@ 2020-06-23  7:08 ` Song Liu
  2020-06-23 18:57   ` Yonghong Song
  2 siblings, 1 reply; 19+ messages in thread
From: Song Liu @ 2020-06-23  7:08 UTC (permalink / raw)
  To: bpf, netdev; +Cc: ast, daniel, kernel-team, john.fastabend, kpsingh, Song Liu

The new test is similar to other bpf_iter tests.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
 .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf96..baa83328f810d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -5,6 +5,7 @@
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
 #include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
@@ -106,6 +107,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_stack(void)
+{
+	struct bpf_iter_task_stack *skel;
+
+	skel = bpf_iter_task_stack__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_stack);
+
+	bpf_iter_task_stack__destroy(skel);
+}
+
 static void test_task_file(void)
 {
 	struct bpf_iter_task_file *skel;
@@ -392,6 +407,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_stack"))
+		test_task_stack();
 	if (test__start_subtest("task_file"))
 		test_task_file();
 	if (test__start_subtest("anon"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 0000000000000..4fc939e0fca77
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+#define MAX_STACK_TRACE_DEPTH   64
+unsigned long entries[MAX_STACK_TRACE_DEPTH];
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	unsigned int i, num_entries;
+
+	if (task == (void *)0)
+		return 0;
+
+	num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH);
+
+	BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid);
+
+	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+		if (num_entries > i)
+			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+	}
+
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
@ 2020-06-23 15:19   ` Alexei Starovoitov
  2020-06-23 16:59     ` Song Liu
  2020-06-23 15:22   ` Daniel Borkmann
  2020-06-23 18:45   ` Andrii Nakryiko
  2 siblings, 1 reply; 19+ messages in thread
From: Alexei Starovoitov @ 2020-06-23 15:19 UTC (permalink / raw)
  To: Song Liu
  Cc: bpf, Network Development, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, John Fastabend, KP Singh

On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>
> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
> a seq_file.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/uapi/linux/bpf.h       | 10 +++++++++-
>  kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  2 ++
>  tools/include/uapi/linux/bpf.h | 10 +++++++++-
>  4 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 19684813faaed..a30416b822fe3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3252,6 +3252,13 @@ union bpf_attr {
>   *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>   *             is returned or the error code -EACCES in case the skb is not
>   *             subject to CHECKSUM_UNNECESSARY.
> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *     Description
> + *             Save a task stack trace into array *entries*. This is a wrapper
> + *             over stack_trace_save_tsk().
> + *     Return
> + *             Number of trace entries stored.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -3389,7 +3396,8 @@ union bpf_attr {
>         FN(ringbuf_submit),             \
>         FN(ringbuf_discard),            \
>         FN(ringbuf_query),              \
> -       FN(csum_level),
> +       FN(csum_level),                 \
> +       FN(get_task_stack_trace),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e729c9e587a07..2c13bcb5c2bce 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>         .arg4_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> +          void *, entries, u32, size)
> +{
> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> +}
> +
> +static int bpf_get_task_stack_trace_btf_ids[5];
> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> +       .func           = bpf_get_task_stack_trace,
> +       .gpl_only       = true,

why?

> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
> +       .arg2_type      = ARG_PTR_TO_MEM,
> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,

OR_ZERO ? why?

> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
> +};
> +
>  static const struct bpf_func_proto *
>  raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  {
> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return prog->expected_attach_type == BPF_TRACE_ITER ?
>                        &bpf_seq_write_proto :
>                        NULL;
> +       case BPF_FUNC_get_task_stack_trace:
> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
> +                       &bpf_get_task_stack_trace_proto :

why limit to iter only?

> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *     Description
> + *             Save a task stack trace into array *entries*. This is a wrapper
> + *             over stack_trace_save_tsk().

size is not documented and looks wrong.
the verifier checks it in bytes, but it's consumed as number of u32s.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
  2020-06-23 15:19   ` Alexei Starovoitov
@ 2020-06-23 15:22   ` Daniel Borkmann
  2020-06-23 17:19     ` Song Liu
  2020-06-23 18:45   ` Andrii Nakryiko
  2 siblings, 1 reply; 19+ messages in thread
From: Daniel Borkmann @ 2020-06-23 15:22 UTC (permalink / raw)
  To: Song Liu, bpf, netdev; +Cc: ast, kernel-team, john.fastabend, kpsingh

On 6/23/20 9:08 AM, Song Liu wrote:
> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
> a seq_file.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>   include/uapi/linux/bpf.h       | 10 +++++++++-
>   kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>   scripts/bpf_helpers_doc.py     |  2 ++
>   tools/include/uapi/linux/bpf.h | 10 +++++++++-
>   4 files changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 19684813faaed..a30416b822fe3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3252,6 +3252,13 @@ union bpf_attr {
>    * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>    * 		is returned or the error code -EACCES in case the skb is not
>    * 		subject to CHECKSUM_UNNECESSARY.
> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *	Description
> + *		Save a task stack trace into array *entries*. This is a wrapper
> + *		over stack_trace_save_tsk().
> + *	Return
> + *		Number of trace entries stored.
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -3389,7 +3396,8 @@ union bpf_attr {
>   	FN(ringbuf_submit),		\
>   	FN(ringbuf_discard),		\
>   	FN(ringbuf_query),		\
> -	FN(csum_level),
> +	FN(csum_level),			\
> +	FN(get_task_stack_trace),
> 
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>    * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e729c9e587a07..2c13bcb5c2bce 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>   	.arg4_type	= ARG_ANYTHING,
>   };
> 
> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> +	   void *, entries, u32, size)
> +{
> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);

nit: cast not needed.

> +}
> +
> +static int bpf_get_task_stack_trace_btf_ids[5];
> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> +	.func		= bpf_get_task_stack_trace,
> +	.gpl_only	= true,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_BTF_ID,
> +	.arg2_type	= ARG_PTR_TO_MEM,
> +	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,

Is there a use-case to pass in entries == NULL + size == 0?

> +	.btf_id		= bpf_get_task_stack_trace_btf_ids,
> +};
> +

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf()
  2020-06-23  7:08 ` [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf() Song Liu
@ 2020-06-23 15:29   ` Daniel Borkmann
  2020-06-23 17:19     ` Song Liu
  0 siblings, 1 reply; 19+ messages in thread
From: Daniel Borkmann @ 2020-06-23 15:29 UTC (permalink / raw)
  To: Song Liu, bpf, netdev; +Cc: ast, kernel-team, john.fastabend, kpsingh

On 6/23/20 9:08 AM, Song Liu wrote:
> This makes it easy to dump stack trace with bpf_seq_printf().
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>   kernel/trace/bpf_trace.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 2c13bcb5c2bce..ced3176801ae8 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -636,7 +636,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
>   		if (fmt[i] == 'p') {
>   			if (fmt[i + 1] == 0 ||
>   			    fmt[i + 1] == 'K' ||
> -			    fmt[i + 1] == 'x') {
> +			    fmt[i + 1] == 'x' ||
> +			    fmt[i + 1] == 'B') {
>   				/* just kernel pointers */
>   				params[fmt_cnt] = args[fmt_cnt];
>   				fmt_cnt++;
> 

Why only bpf_seq_printf(), what about bpf_trace_printk()?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23 15:19   ` Alexei Starovoitov
@ 2020-06-23 16:59     ` Song Liu
  2020-06-23 17:40       ` Song Liu
  2020-06-23 18:41       ` Andrii Nakryiko
  0 siblings, 2 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23 16:59 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: bpf, Network Development, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, John Fastabend, KP Singh



> On Jun 23, 2020, at 8:19 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>> 

[...]

>> 
>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>> +          void *, entries, u32, size)
>> +{
>> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>> +}
>> +
>> +static int bpf_get_task_stack_trace_btf_ids[5];
>> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
>> +       .func           = bpf_get_task_stack_trace,
>> +       .gpl_only       = true,
> 
> why?

Actually, I am not sure when we should use gpl_only = true. 

> 
>> +       .ret_type       = RET_INTEGER,
>> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
>> +       .arg2_type      = ARG_PTR_TO_MEM,
>> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
> 
> OR_ZERO ? why?

Will fix. 

> 
>> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
>> +};
>> +
>> static const struct bpf_func_proto *
>> raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>> {
>> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>                return prog->expected_attach_type == BPF_TRACE_ITER ?
>>                       &bpf_seq_write_proto :
>>                       NULL;
>> +       case BPF_FUNC_get_task_stack_trace:
>> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
>> +                       &bpf_get_task_stack_trace_proto :
> 
> why limit to iter only?

I guess it is also useful for other types. Maybe move to bpf_tracing_func_proto()?

> 
>> + *
>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>> + *     Description
>> + *             Save a task stack trace into array *entries*. This is a wrapper
>> + *             over stack_trace_save_tsk().
> 
> size is not documented and looks wrong.
> the verifier checks it in bytes, but it's consumed as number of u32s.

I am not 100% sure, but verifier seems check it correctly. And I think it is consumed
as u64s?

Thanks,
Song


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf()
  2020-06-23 15:29   ` Daniel Borkmann
@ 2020-06-23 17:19     ` Song Liu
  0 siblings, 0 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23 17:19 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: bpf, netdev, ast, Kernel Team, john.fastabend, kpsingh



> On Jun 23, 2020, at 8:29 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> 
> On 6/23/20 9:08 AM, Song Liu wrote:
>> This makes it easy to dump stack trace with bpf_seq_printf().
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>>  kernel/trace/bpf_trace.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index 2c13bcb5c2bce..ced3176801ae8 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -636,7 +636,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
>>  		if (fmt[i] == 'p') {
>>  			if (fmt[i + 1] == 0 ||
>>  			    fmt[i + 1] == 'K' ||
>> -			    fmt[i + 1] == 'x') {
>> +			    fmt[i + 1] == 'x' ||
>> +			    fmt[i + 1] == 'B') {
>>  				/* just kernel pointers */
>>  				params[fmt_cnt] = args[fmt_cnt];
>>  				fmt_cnt++;
> 
> Why only bpf_seq_printf(), what about bpf_trace_printk()?

The use case we are looking at needs bpf_seq_printf(). Let me also add it to
bpf_trace_printk(). 

Thanks,
Song

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23 15:22   ` Daniel Borkmann
@ 2020-06-23 17:19     ` Song Liu
  0 siblings, 0 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23 17:19 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: bpf, netdev, ast, Kernel Team, john.fastabend, kpsingh



> On Jun 23, 2020, at 8:22 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> 
> On 6/23/20 9:08 AM, Song Liu wrote:
>> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
>> a seq_file.
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>>  include/uapi/linux/bpf.h       | 10 +++++++++-
>>  kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>>  scripts/bpf_helpers_doc.py     |  2 ++
>>  tools/include/uapi/linux/bpf.h | 10 +++++++++-
>>  4 files changed, 41 insertions(+), 2 deletions(-)
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 19684813faaed..a30416b822fe3 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3252,6 +3252,13 @@ union bpf_attr {
>>   * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>>   * 		is returned or the error code -EACCES in case the skb is not
>>   * 		subject to CHECKSUM_UNNECESSARY.
>> + *
>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>> + *	Description
>> + *		Save a task stack trace into array *entries*. This is a wrapper
>> + *		over stack_trace_save_tsk().
>> + *	Return
>> + *		Number of trace entries stored.
>>   */
>>  #define __BPF_FUNC_MAPPER(FN)		\
>>  	FN(unspec),			\
>> @@ -3389,7 +3396,8 @@ union bpf_attr {
>>  	FN(ringbuf_submit),		\
>>  	FN(ringbuf_discard),		\
>>  	FN(ringbuf_query),		\
>> -	FN(csum_level),
>> +	FN(csum_level),			\
>> +	FN(get_task_stack_trace),
>>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>   * function eBPF program intends to call
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index e729c9e587a07..2c13bcb5c2bce 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>>  	.arg4_type	= ARG_ANYTHING,
>>  };
>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>> +	   void *, entries, u32, size)
>> +{
>> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> 
> nit: cast not needed.

Will fix. 

> 
>> +}
>> +
>> +static int bpf_get_task_stack_trace_btf_ids[5];
>> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
>> +	.func		= bpf_get_task_stack_trace,
>> +	.gpl_only	= true,
>> +	.ret_type	= RET_INTEGER,
>> +	.arg1_type	= ARG_PTR_TO_BTF_ID,
>> +	.arg2_type	= ARG_PTR_TO_MEM,
>> +	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
> 
> Is there a use-case to pass in entries == NULL + size == 0?

Not really. Will fix. 


> 
>> +	.btf_id		= bpf_get_task_stack_trace_btf_ids,
>> +};
>> +


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23 16:59     ` Song Liu
@ 2020-06-23 17:40       ` Song Liu
  2020-06-23 18:41       ` Andrii Nakryiko
  1 sibling, 0 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23 17:40 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: bpf, Network Development, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, John Fastabend, KP Singh



> On Jun 23, 2020, at 9:59 AM, Song Liu <songliubraving@fb.com> wrote:
> 
> 
> 
>> On Jun 23, 2020, at 8:19 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>> 
>> On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>>> 
> 
> [...]
> 
>>> 
>>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>>> +          void *, entries, u32, size)
>>> +{
>>> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>>> +}
>>> +
>>> +static int bpf_get_task_stack_trace_btf_ids[5];
>>> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
>>> +       .func           = bpf_get_task_stack_trace,
>>> +       .gpl_only       = true,
>> 
>> why?
> 
> Actually, I am not sure when we should use gpl_only = true. 
> 
>> 
>>> +       .ret_type       = RET_INTEGER,
>>> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
>>> +       .arg2_type      = ARG_PTR_TO_MEM,
>>> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
>> 
>> OR_ZERO ? why?
> 
> Will fix. 
> 
>> 
>>> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
>>> +};
>>> +
>>> static const struct bpf_func_proto *
>>> raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>> {
>>> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>>               return prog->expected_attach_type == BPF_TRACE_ITER ?
>>>                      &bpf_seq_write_proto :
>>>                      NULL;
>>> +       case BPF_FUNC_get_task_stack_trace:
>>> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
>>> +                       &bpf_get_task_stack_trace_proto :
>> 
>> why limit to iter only?
> 
> I guess it is also useful for other types. Maybe move to bpf_tracing_func_proto()?
> 
>> 
>>> + *
>>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>>> + *     Description
>>> + *             Save a task stack trace into array *entries*. This is a wrapper
>>> + *             over stack_trace_save_tsk().
>> 
>> size is not documented and looks wrong.
>> the verifier checks it in bytes, but it's consumed as number of u32s.
> 
> I am not 100% sure, but verifier seems check it correctly. And I think it is consumed
> as u64s?

I was wrong. Verifier checks as bytes. Will fix. 

Song

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23 16:59     ` Song Liu
  2020-06-23 17:40       ` Song Liu
@ 2020-06-23 18:41       ` Andrii Nakryiko
  1 sibling, 0 replies; 19+ messages in thread
From: Andrii Nakryiko @ 2020-06-23 18:41 UTC (permalink / raw)
  To: Song Liu
  Cc: Alexei Starovoitov, bpf, Network Development, Alexei Starovoitov,
	Daniel Borkmann, Kernel Team, John Fastabend, KP Singh

On Tue, Jun 23, 2020 at 10:00 AM Song Liu <songliubraving@fb.com> wrote:
>
>
>
> > On Jun 23, 2020, at 8:19 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
> >>
>
> [...]
>
> >>
> >> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> >> +          void *, entries, u32, size)
> >> +{
> >> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> >> +}
> >> +
> >> +static int bpf_get_task_stack_trace_btf_ids[5];
> >> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> >> +       .func           = bpf_get_task_stack_trace,
> >> +       .gpl_only       = true,
> >
> > why?
>
> Actually, I am not sure when we should use gpl_only = true.
>
> >
> >> +       .ret_type       = RET_INTEGER,
> >> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
> >> +       .arg2_type      = ARG_PTR_TO_MEM,
> >> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
> >
> > OR_ZERO ? why?
>
> Will fix.

I actually think it's a good idea, because it makes writing code that
uses variable-sized buffers easier. Remember how we had
bpf_perf_event_output() forcing size > 0? That was a major PITA and
required unnecessary code gymnastics to prove verifier it's OK (even
if zero size was never possible). Yonghong eventually fixed that to be
_OR_ZERO.

So if this is not causing any problems, please leave it as _OR_ZERO.
Thank you from everyone who had to suffer through dealing with
anything variable-sized in BPF!

>
> >
> >> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
> >> +};
> >> +
> >> static const struct bpf_func_proto *
> >> raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >> {
> >> @@ -1521,6 +1538,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >>                return prog->expected_attach_type == BPF_TRACE_ITER ?
> >>                       &bpf_seq_write_proto :
> >>                       NULL;
> >> +       case BPF_FUNC_get_task_stack_trace:
> >> +               return prog->expected_attach_type == BPF_TRACE_ITER ?
> >> +                       &bpf_get_task_stack_trace_proto :
> >
> > why limit to iter only?
>
> I guess it is also useful for other types. Maybe move to bpf_tracing_func_proto()?
>
> >
> >> + *
> >> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> >> + *     Description
> >> + *             Save a task stack trace into array *entries*. This is a wrapper
> >> + *             over stack_trace_save_tsk().
> >
> > size is not documented and looks wrong.
> > the verifier checks it in bytes, but it's consumed as number of u32s.
>
> I am not 100% sure, but verifier seems check it correctly. And I think it is consumed
> as u64s?
>
> Thanks,
> Song
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
  2020-06-23 15:19   ` Alexei Starovoitov
  2020-06-23 15:22   ` Daniel Borkmann
@ 2020-06-23 18:45   ` Andrii Nakryiko
  2020-06-23 22:53     ` Song Liu
  2 siblings, 1 reply; 19+ messages in thread
From: Andrii Nakryiko @ 2020-06-23 18:45 UTC (permalink / raw)
  To: Song Liu
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, john fastabend, KP Singh

On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>
> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
> a seq_file.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  include/uapi/linux/bpf.h       | 10 +++++++++-
>  kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  2 ++
>  tools/include/uapi/linux/bpf.h | 10 +++++++++-
>  4 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 19684813faaed..a30416b822fe3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3252,6 +3252,13 @@ union bpf_attr {
>   *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>   *             is returned or the error code -EACCES in case the skb is not
>   *             subject to CHECKSUM_UNNECESSARY.
> + *
> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
> + *     Description
> + *             Save a task stack trace into array *entries*. This is a wrapper
> + *             over stack_trace_save_tsk().
> + *     Return
> + *             Number of trace entries stored.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -3389,7 +3396,8 @@ union bpf_attr {
>         FN(ringbuf_submit),             \
>         FN(ringbuf_discard),            \
>         FN(ringbuf_query),              \
> -       FN(csum_level),
> +       FN(csum_level),                 \
> +       FN(get_task_stack_trace),

We have get_stackid and get_stack, I think to stay consistent it
should be named get_task_stack

>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e729c9e587a07..2c13bcb5c2bce 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>         .arg4_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
> +          void *, entries, u32, size)

See get_stack definition, this one needs to support flags as well. And
we should probably support BPF_F_USER_BUILD_ID as well. And
BPF_F_USER_STACK is also a good idea, I presume?

> +{
> +       return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> +}
> +
> +static int bpf_get_task_stack_trace_btf_ids[5];
> +static const struct bpf_func_proto bpf_get_task_stack_trace_proto = {
> +       .func           = bpf_get_task_stack_trace,
> +       .gpl_only       = true,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_BTF_ID,
> +       .arg2_type      = ARG_PTR_TO_MEM,
> +       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
> +       .btf_id         = bpf_get_task_stack_trace_btf_ids,
> +};
> +

[...]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()
  2020-06-23  7:08 ` [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace() Song Liu
@ 2020-06-23 18:57   ` Yonghong Song
  2020-06-23 22:07     ` Song Liu
  0 siblings, 1 reply; 19+ messages in thread
From: Yonghong Song @ 2020-06-23 18:57 UTC (permalink / raw)
  To: Song Liu, bpf, netdev; +Cc: ast, daniel, kernel-team, john.fastabend, kpsingh



On 6/23/20 12:08 AM, Song Liu wrote:
> The new test is similar to other bpf_iter tests.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
>   .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
>   2 files changed, 67 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 87c29dde1cf96..baa83328f810d 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -5,6 +5,7 @@
>   #include "bpf_iter_netlink.skel.h"
>   #include "bpf_iter_bpf_map.skel.h"
>   #include "bpf_iter_task.skel.h"
> +#include "bpf_iter_task_stack.skel.h"
>   #include "bpf_iter_task_file.skel.h"
>   #include "bpf_iter_test_kern1.skel.h"
>   #include "bpf_iter_test_kern2.skel.h"
> @@ -106,6 +107,20 @@ static void test_task(void)
>   	bpf_iter_task__destroy(skel);
>   }
>   
> +static void test_task_stack(void)
> +{
> +	struct bpf_iter_task_stack *skel;
> +
> +	skel = bpf_iter_task_stack__open_and_load();
> +	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
> +		  "skeleton open_and_load failed\n"))
> +		return;
> +
> +	do_dummy_read(skel->progs.dump_task_stack);
> +
> +	bpf_iter_task_stack__destroy(skel);
> +}
> +
>   static void test_task_file(void)
>   {
>   	struct bpf_iter_task_file *skel;
> @@ -392,6 +407,8 @@ void test_bpf_iter(void)
>   		test_bpf_map();
>   	if (test__start_subtest("task"))
>   		test_task();
> +	if (test__start_subtest("task_stack"))
> +		test_task_stack();
>   	if (test__start_subtest("task_file"))
>   		test_task_file();
>   	if (test__start_subtest("anon"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
> new file mode 100644
> index 0000000000000..4fc939e0fca77
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
> @@ -0,0 +1,50 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2020 Facebook */
> +/* "undefine" structs in vmlinux.h, because we "override" them below */
> +#define bpf_iter_meta bpf_iter_meta___not_used
> +#define bpf_iter__task bpf_iter__task___not_used
> +#include "vmlinux.h"
> +#undef bpf_iter_meta
> +#undef bpf_iter__task
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +struct bpf_iter_meta {
> +	struct seq_file *seq;
> +	__u64 session_id;
> +	__u64 seq_num;
> +} __attribute__((preserve_access_index));
> +
> +struct bpf_iter__task {
> +	struct bpf_iter_meta *meta;
> +	struct task_struct *task;
> +} __attribute__((preserve_access_index));
> +
> +#define MAX_STACK_TRACE_DEPTH   64
> +unsigned long entries[MAX_STACK_TRACE_DEPTH];
> +
> +SEC("iter/task")
> +int dump_task_stack(struct bpf_iter__task *ctx)
> +{
> +	struct seq_file *seq = ctx->meta->seq;
> +	struct task_struct *task = ctx->task;
> +	unsigned int i, num_entries;
> +
> +	if (task == (void *)0)
> +		return 0;
> +
> +	num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH);
> +
> +	BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid);
> +
> +	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
> +		if (num_entries > i)
> +			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);

We may have an issue on 32bit issue.
On 32bit system, the following is called in the kernel
+	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
it will pack addresses at 4 byte increment.
But in BPF program, the reading is in 8 byte increment.

If "entries" are handled in user space, user space can just using "long 
*" pointer and will be able to correctly retrieve the stack addresses.

Maybe add some comments to clarify this prog only works for 64bit system.

> +	}
> +
> +	BPF_SEQ_PRINTF(seq, "\n");
> +
> +	return 0;
> +}
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()
  2020-06-23 18:57   ` Yonghong Song
@ 2020-06-23 22:07     ` Song Liu
  2020-06-23 22:27       ` Yonghong Song
  0 siblings, 1 reply; 19+ messages in thread
From: Song Liu @ 2020-06-23 22:07 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, netdev, ast, daniel, Kernel Team, john.fastabend, kpsingh



> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote:
> 
> 
> 
> On 6/23/20 12:08 AM, Song Liu wrote:
>> The new test is similar to other bpf_iter tests.
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>>  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
>>  .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
>>  2 files changed, 67 insertions(+)
>>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>> index 87c29dde1cf96..baa83328f810d 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>> @@ -5,6 +5,7 @@
>>  #include "bpf_iter_netlink.skel.h"
>>  #include "bpf_iter_bpf_map.skel.h"
>>  #include "bpf_iter_task.skel.h"
>> +#include "bpf_iter_task_stack.skel.h"
>>  #include "bpf_iter_task_file.skel.h"
>>  #include "bpf_iter_test_kern1.skel.h"
>>  #include "bpf_iter_test_kern2.skel.h"
>> @@ -106,6 +107,20 @@ static void test_task(void)
>>  	bpf_iter_task__destroy(skel);
>>  }
>>  +static void test_task_stack(void)
>> +{
>> +	struct bpf_iter_task_stack *skel;
>> +
>> +	skel = bpf_iter_task_stack__open_and_load();
>> +	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
>> +		  "skeleton open_and_load failed\n"))
>> +		return;
>> +
>> +	do_dummy_read(skel->progs.dump_task_stack);
>> +
>> +	bpf_iter_task_stack__destroy(skel);
>> +}
>> +
>>  static void test_task_file(void)
>>  {
>>  	struct bpf_iter_task_file *skel;
>> @@ -392,6 +407,8 @@ void test_bpf_iter(void)
>>  		test_bpf_map();
>>  	if (test__start_subtest("task"))
>>  		test_task();
>> +	if (test__start_subtest("task_stack"))
>> +		test_task_stack();
>>  	if (test__start_subtest("task_file"))
>>  		test_task_file();
>>  	if (test__start_subtest("anon"))
>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>> new file mode 100644
>> index 0000000000000..4fc939e0fca77
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>> @@ -0,0 +1,50 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (c) 2020 Facebook */
>> +/* "undefine" structs in vmlinux.h, because we "override" them below */
>> +#define bpf_iter_meta bpf_iter_meta___not_used
>> +#define bpf_iter__task bpf_iter__task___not_used
>> +#include "vmlinux.h"
>> +#undef bpf_iter_meta
>> +#undef bpf_iter__task
>> +#include <bpf/bpf_helpers.h>
>> +#include <bpf/bpf_tracing.h>
>> +
>> +char _license[] SEC("license") = "GPL";
>> +
>> +struct bpf_iter_meta {
>> +	struct seq_file *seq;
>> +	__u64 session_id;
>> +	__u64 seq_num;
>> +} __attribute__((preserve_access_index));
>> +
>> +struct bpf_iter__task {
>> +	struct bpf_iter_meta *meta;
>> +	struct task_struct *task;
>> +} __attribute__((preserve_access_index));
>> +
>> +#define MAX_STACK_TRACE_DEPTH   64
>> +unsigned long entries[MAX_STACK_TRACE_DEPTH];
>> +
>> +SEC("iter/task")
>> +int dump_task_stack(struct bpf_iter__task *ctx)
>> +{
>> +	struct seq_file *seq = ctx->meta->seq;
>> +	struct task_struct *task = ctx->task;
>> +	unsigned int i, num_entries;
>> +
>> +	if (task == (void *)0)
>> +		return 0;
>> +
>> +	num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH);
>> +
>> +	BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid);
>> +
>> +	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
>> +		if (num_entries > i)
>> +			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
> 
> We may have an issue on 32bit issue.
> On 32bit system, the following is called in the kernel
> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
> it will pack addresses at 4 byte increment.
> But in BPF program, the reading is in 8 byte increment.

Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down
size to closest multiple of 8? 

Thanks,
Song

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()
  2020-06-23 22:07     ` Song Liu
@ 2020-06-23 22:27       ` Yonghong Song
  2020-06-24 20:37         ` Song Liu
  0 siblings, 1 reply; 19+ messages in thread
From: Yonghong Song @ 2020-06-23 22:27 UTC (permalink / raw)
  To: Song Liu; +Cc: bpf, netdev, ast, daniel, Kernel Team, john.fastabend, kpsingh



On 6/23/20 3:07 PM, Song Liu wrote:
> 
> 
>> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 6/23/20 12:08 AM, Song Liu wrote:
>>> The new test is similar to other bpf_iter tests.
>>> Signed-off-by: Song Liu <songliubraving@fb.com>
>>> ---
>>>   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
>>>   .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
>>>   2 files changed, 67 insertions(+)
>>>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> index 87c29dde1cf96..baa83328f810d 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> @@ -5,6 +5,7 @@
>>>   #include "bpf_iter_netlink.skel.h"
>>>   #include "bpf_iter_bpf_map.skel.h"
>>>   #include "bpf_iter_task.skel.h"
>>> +#include "bpf_iter_task_stack.skel.h"
>>>   #include "bpf_iter_task_file.skel.h"
>>>   #include "bpf_iter_test_kern1.skel.h"
>>>   #include "bpf_iter_test_kern2.skel.h"
>>> @@ -106,6 +107,20 @@ static void test_task(void)
>>>   	bpf_iter_task__destroy(skel);
>>>   }
>>>   +static void test_task_stack(void)
>>> +{
>>> +	struct bpf_iter_task_stack *skel;
>>> +
>>> +	skel = bpf_iter_task_stack__open_and_load();
>>> +	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
>>> +		  "skeleton open_and_load failed\n"))
>>> +		return;
>>> +
>>> +	do_dummy_read(skel->progs.dump_task_stack);
>>> +
>>> +	bpf_iter_task_stack__destroy(skel);
>>> +}
>>> +
>>>   static void test_task_file(void)
>>>   {
>>>   	struct bpf_iter_task_file *skel;
>>> @@ -392,6 +407,8 @@ void test_bpf_iter(void)
>>>   		test_bpf_map();
>>>   	if (test__start_subtest("task"))
>>>   		test_task();
>>> +	if (test__start_subtest("task_stack"))
>>> +		test_task_stack();
>>>   	if (test__start_subtest("task_file"))
>>>   		test_task_file();
>>>   	if (test__start_subtest("anon"))
>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>> new file mode 100644
>>> index 0000000000000..4fc939e0fca77
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>> @@ -0,0 +1,50 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/* Copyright (c) 2020 Facebook */
>>> +/* "undefine" structs in vmlinux.h, because we "override" them below */
>>> +#define bpf_iter_meta bpf_iter_meta___not_used
>>> +#define bpf_iter__task bpf_iter__task___not_used
>>> +#include "vmlinux.h"
>>> +#undef bpf_iter_meta
>>> +#undef bpf_iter__task
>>> +#include <bpf/bpf_helpers.h>
>>> +#include <bpf/bpf_tracing.h>
>>> +
>>> +char _license[] SEC("license") = "GPL";
>>> +
>>> +struct bpf_iter_meta {
>>> +	struct seq_file *seq;
>>> +	__u64 session_id;
>>> +	__u64 seq_num;
>>> +} __attribute__((preserve_access_index));
>>> +
>>> +struct bpf_iter__task {
>>> +	struct bpf_iter_meta *meta;
>>> +	struct task_struct *task;
>>> +} __attribute__((preserve_access_index));
>>> +
>>> +#define MAX_STACK_TRACE_DEPTH   64
>>> +unsigned long entries[MAX_STACK_TRACE_DEPTH];
>>> +
>>> +SEC("iter/task")
>>> +int dump_task_stack(struct bpf_iter__task *ctx)
>>> +{
>>> +	struct seq_file *seq = ctx->meta->seq;
>>> +	struct task_struct *task = ctx->task;
>>> +	unsigned int i, num_entries;
>>> +
>>> +	if (task == (void *)0)
>>> +		return 0;
>>> +
>>> +	num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH);
>>> +
>>> +	BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid);
>>> +
>>> +	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
>>> +		if (num_entries > i)
>>> +			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
>>
>> We may have an issue on 32bit issue.
>> On 32bit system, the following is called in the kernel
>> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>> it will pack addresses at 4 byte increment.
>> But in BPF program, the reading is in 8 byte increment.
> 
> Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down
> size to closest multiple of 8?

This is what I mean:
   for bpf program: "long" means u64, so we allocate 64 * 8 buffer size
                    and pass it to the helper
   in the helper, the address will be increased along sizeof(long), which
                  is 4 for 32bit system.
           So address is recorded at buf, buf + 4, buf + 8, buf + 12, ...
   After the helper returns, the bpf program tries to retrieve
           the address at buf, buf + 8, buf + 16.

The helper itself is okay. But BPF_SEQ_PRINTF above is wrong.
Is this interpretation correct?

> 
> Thanks,
> Song
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace()
  2020-06-23 18:45   ` Andrii Nakryiko
@ 2020-06-23 22:53     ` Song Liu
  0 siblings, 0 replies; 19+ messages in thread
From: Song Liu @ 2020-06-23 22:53 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, john fastabend, KP Singh



> On Jun 23, 2020, at 11:45 AM, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> 
> On Tue, Jun 23, 2020 at 12:08 AM Song Liu <songliubraving@fb.com> wrote:
>> 
>> This helper can be used with bpf_iter__task to dump all /proc/*/stack to
>> a seq_file.
>> 
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> include/uapi/linux/bpf.h       | 10 +++++++++-
>> kernel/trace/bpf_trace.c       | 21 +++++++++++++++++++++
>> scripts/bpf_helpers_doc.py     |  2 ++
>> tools/include/uapi/linux/bpf.h | 10 +++++++++-
>> 4 files changed, 41 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 19684813faaed..a30416b822fe3 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3252,6 +3252,13 @@ union bpf_attr {
>>  *             case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
>>  *             is returned or the error code -EACCES in case the skb is not
>>  *             subject to CHECKSUM_UNNECESSARY.
>> + *
>> + * int bpf_get_task_stack_trace(struct task_struct *task, void *entries, u32 size)
>> + *     Description
>> + *             Save a task stack trace into array *entries*. This is a wrapper
>> + *             over stack_trace_save_tsk().
>> + *     Return
>> + *             Number of trace entries stored.
>>  */
>> #define __BPF_FUNC_MAPPER(FN)          \
>>        FN(unspec),                     \
>> @@ -3389,7 +3396,8 @@ union bpf_attr {
>>        FN(ringbuf_submit),             \
>>        FN(ringbuf_discard),            \
>>        FN(ringbuf_query),              \
>> -       FN(csum_level),
>> +       FN(csum_level),                 \
>> +       FN(get_task_stack_trace),
> 
> We have get_stackid and get_stack, I think to stay consistent it
> should be named get_task_stack
> 
>> 
>> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>  * function eBPF program intends to call
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index e729c9e587a07..2c13bcb5c2bce 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1488,6 +1488,23 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
>>        .arg4_type      = ARG_ANYTHING,
>> };
>> 
>> +BPF_CALL_3(bpf_get_task_stack_trace, struct task_struct *, task,
>> +          void *, entries, u32, size)
> 
> See get_stack definition, this one needs to support flags as well. And
> we should probably support BPF_F_USER_BUILD_ID as well. And
> BPF_F_USER_STACK is also a good idea, I presume?

This will be a different direction that is similar to stackmap implementation.
Current version follows the implementation behind /proc/<pid>/stack . Let me 
check which of them is better. 

Thanks,
Song


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()
  2020-06-23 22:27       ` Yonghong Song
@ 2020-06-24 20:37         ` Song Liu
  2020-06-25  5:29           ` Yonghong Song
  0 siblings, 1 reply; 19+ messages in thread
From: Song Liu @ 2020-06-24 20:37 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, netdev, ast, daniel, Kernel Team, john.fastabend, kpsingh



> On Jun 23, 2020, at 3:27 PM, Yonghong Song <yhs@fb.com> wrote:
> 
> 
> 
> On 6/23/20 3:07 PM, Song Liu wrote:
>>> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote:
>>> 
>>> 
>>> 
>>> On 6/23/20 12:08 AM, Song Liu wrote:
>>>> The new test is similar to other bpf_iter tests.
>>>> Signed-off-by: Song Liu <songliubraving@fb.com>
>>>> ---
>>>>  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
>>>>  .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
>>>>  2 files changed, 67 insertions(+)
>>>>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>>> index 87c29dde1cf96..baa83328f810d 100644
>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>>> @@ -5,6 +5,7 @@
>>>>  #include "bpf_iter_netlink.skel.h"
>>>>  #include "bpf_iter_bpf_map.skel.h"
>>>>  #include "bpf_iter_task.skel.h"
>>>> +#include "bpf_iter_task_stack.skel.h"
>>>>  #include "bpf_iter_task_file.skel.h"
>>>>  #include "bpf_iter_test_kern1.skel.h"
>>>>  #include "bpf_iter_test_kern2.skel.h"
>>>> @@ -106,6 +107,20 @@ static void test_task(void)
>>>>  	bpf_iter_task__destroy(skel);
>>>>  }
>>>>  +static void test_task_stack(void)
>>>> +{
>>>> +	struct bpf_iter_task_stack *skel;
>>>> +
>>>> +	skel = bpf_iter_task_stack__open_and_load();
>>>> +	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
>>>> +		  "skeleton open_and_load failed\n"))
>>>> +		return;
>>>> +
>>>> +	do_dummy_read(skel->progs.dump_task_stack);
>>>> +
>>>> +	bpf_iter_task_stack__destroy(skel);
>>>> +}
>>>> +
>>>>  static void test_task_file(void)
>>>>  {
>>>>  	struct bpf_iter_task_file *skel;
>>>> @@ -392,6 +407,8 @@ void test_bpf_iter(void)
>>>>  		test_bpf_map();
>>>>  	if (test__start_subtest("task"))
>>>>  		test_task();
>>>> +	if (test__start_subtest("task_stack"))
>>>> +		test_task_stack();
>>>>  	if (test__start_subtest("task_file"))
>>>>  		test_task_file();
>>>>  	if (test__start_subtest("anon"))
>>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>>> new file mode 100644
>>>> index 0000000000000..4fc939e0fca77
>>>> --- /dev/null
>>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>>> @@ -0,0 +1,50 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/* Copyright (c) 2020 Facebook */
>>>> +/* "undefine" structs in vmlinux.h, because we "override" them below */
>>>> +#define bpf_iter_meta bpf_iter_meta___not_used
>>>> +#define bpf_iter__task bpf_iter__task___not_used
>>>> +#include "vmlinux.h"
>>>> +#undef bpf_iter_meta
>>>> +#undef bpf_iter__task
>>>> +#include <bpf/bpf_helpers.h>
>>>> +#include <bpf/bpf_tracing.h>
>>>> +
>>>> +char _license[] SEC("license") = "GPL";
>>>> +
>>>> +struct bpf_iter_meta {
>>>> +	struct seq_file *seq;
>>>> +	__u64 session_id;
>>>> +	__u64 seq_num;
>>>> +} __attribute__((preserve_access_index));
>>>> +
>>>> +struct bpf_iter__task {
>>>> +	struct bpf_iter_meta *meta;
>>>> +	struct task_struct *task;
>>>> +} __attribute__((preserve_access_index));
>>>> +
>>>> +#define MAX_STACK_TRACE_DEPTH   64
>>>> +unsigned long entries[MAX_STACK_TRACE_DEPTH];
>>>> +
>>>> +SEC("iter/task")
>>>> +int dump_task_stack(struct bpf_iter__task *ctx)
>>>> +{
>>>> +	struct seq_file *seq = ctx->meta->seq;
>>>> +	struct task_struct *task = ctx->task;
>>>> +	unsigned int i, num_entries;
>>>> +
>>>> +	if (task == (void *)0)
>>>> +		return 0;
>>>> +
>>>> +	num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH);
>>>> +
>>>> +	BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid);
>>>> +
>>>> +	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
>>>> +		if (num_entries > i)
>>>> +			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
>>> 
>>> We may have an issue on 32bit issue.
>>> On 32bit system, the following is called in the kernel
>>> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>>> it will pack addresses at 4 byte increment.
>>> But in BPF program, the reading is in 8 byte increment.
>> Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down
>> size to closest multiple of 8?
> 
> This is what I mean:
>  for bpf program: "long" means u64, so we allocate 64 * 8 buffer size
>                   and pass it to the helper
>  in the helper, the address will be increased along sizeof(long), which
>                 is 4 for 32bit system.
>          So address is recorded at buf, buf + 4, buf + 8, buf + 12, ...
>  After the helper returns, the bpf program tries to retrieve
>          the address at buf, buf + 8, buf + 16.
> 
> The helper itself is okay. But BPF_SEQ_PRINTF above is wrong.
> Is this interpretation correct?

Thanks for the clarification. I guess the best solution is to fix this 
once in the kernel, so BPF programs don't have to worry about it. 

Song


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace()
  2020-06-24 20:37         ` Song Liu
@ 2020-06-25  5:29           ` Yonghong Song
  0 siblings, 0 replies; 19+ messages in thread
From: Yonghong Song @ 2020-06-25  5:29 UTC (permalink / raw)
  To: Song Liu; +Cc: bpf, netdev, ast, daniel, Kernel Team, john.fastabend, kpsingh



On 6/24/20 1:37 PM, Song Liu wrote:
> 
> 
>> On Jun 23, 2020, at 3:27 PM, Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 6/23/20 3:07 PM, Song Liu wrote:
>>>> On Jun 23, 2020, at 11:57 AM, Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>>
>>>>
>>>> On 6/23/20 12:08 AM, Song Liu wrote:
>>>>> The new test is similar to other bpf_iter tests.
>>>>> Signed-off-by: Song Liu <songliubraving@fb.com>
>>>>> ---
>>>>>   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++++
>>>>>   .../selftests/bpf/progs/bpf_iter_task_stack.c | 50 +++++++++++++++++++
>>>>>   2 files changed, 67 insertions(+)
>>>>>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>>>> index 87c29dde1cf96..baa83328f810d 100644
>>>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>>>> @@ -5,6 +5,7 @@
>>>>>   #include "bpf_iter_netlink.skel.h"
>>>>>   #include "bpf_iter_bpf_map.skel.h"
>>>>>   #include "bpf_iter_task.skel.h"
>>>>> +#include "bpf_iter_task_stack.skel.h"
>>>>>   #include "bpf_iter_task_file.skel.h"
>>>>>   #include "bpf_iter_test_kern1.skel.h"
>>>>>   #include "bpf_iter_test_kern2.skel.h"
>>>>> @@ -106,6 +107,20 @@ static void test_task(void)
>>>>>   	bpf_iter_task__destroy(skel);
>>>>>   }
>>>>>   +static void test_task_stack(void)
>>>>> +{
>>>>> +	struct bpf_iter_task_stack *skel;
>>>>> +
>>>>> +	skel = bpf_iter_task_stack__open_and_load();
>>>>> +	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
>>>>> +		  "skeleton open_and_load failed\n"))
>>>>> +		return;
>>>>> +
>>>>> +	do_dummy_read(skel->progs.dump_task_stack);
>>>>> +
>>>>> +	bpf_iter_task_stack__destroy(skel);
>>>>> +}
>>>>> +
>>>>>   static void test_task_file(void)
>>>>>   {
>>>>>   	struct bpf_iter_task_file *skel;
>>>>> @@ -392,6 +407,8 @@ void test_bpf_iter(void)
>>>>>   		test_bpf_map();
>>>>>   	if (test__start_subtest("task"))
>>>>>   		test_task();
>>>>> +	if (test__start_subtest("task_stack"))
>>>>> +		test_task_stack();
>>>>>   	if (test__start_subtest("task_file"))
>>>>>   		test_task_file();
>>>>>   	if (test__start_subtest("anon"))
>>>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>>>> new file mode 100644
>>>>> index 0000000000000..4fc939e0fca77
>>>>> --- /dev/null
>>>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
>>>>> @@ -0,0 +1,50 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>> +/* Copyright (c) 2020 Facebook */
>>>>> +/* "undefine" structs in vmlinux.h, because we "override" them below */
>>>>> +#define bpf_iter_meta bpf_iter_meta___not_used
>>>>> +#define bpf_iter__task bpf_iter__task___not_used
>>>>> +#include "vmlinux.h"
>>>>> +#undef bpf_iter_meta
>>>>> +#undef bpf_iter__task
>>>>> +#include <bpf/bpf_helpers.h>
>>>>> +#include <bpf/bpf_tracing.h>
>>>>> +
>>>>> +char _license[] SEC("license") = "GPL";
>>>>> +
>>>>> +struct bpf_iter_meta {
>>>>> +	struct seq_file *seq;
>>>>> +	__u64 session_id;
>>>>> +	__u64 seq_num;
>>>>> +} __attribute__((preserve_access_index));
>>>>> +
>>>>> +struct bpf_iter__task {
>>>>> +	struct bpf_iter_meta *meta;
>>>>> +	struct task_struct *task;
>>>>> +} __attribute__((preserve_access_index));
>>>>> +
>>>>> +#define MAX_STACK_TRACE_DEPTH   64
>>>>> +unsigned long entries[MAX_STACK_TRACE_DEPTH];
>>>>> +
>>>>> +SEC("iter/task")
>>>>> +int dump_task_stack(struct bpf_iter__task *ctx)
>>>>> +{
>>>>> +	struct seq_file *seq = ctx->meta->seq;
>>>>> +	struct task_struct *task = ctx->task;
>>>>> +	unsigned int i, num_entries;
>>>>> +
>>>>> +	if (task == (void *)0)
>>>>> +		return 0;
>>>>> +
>>>>> +	num_entries = bpf_get_task_stack_trace(task, entries, MAX_STACK_TRACE_DEPTH);
>>>>> +
>>>>> +	BPF_SEQ_PRINTF(seq, "pid: %8u\n", task->pid);
>>>>> +
>>>>> +	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
>>>>> +		if (num_entries > i)
>>>>> +			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
>>>>
>>>> We may have an issue on 32bit issue.
>>>> On 32bit system, the following is called in the kernel
>>>> +	return stack_trace_save_tsk(task, (unsigned long *)entries, size, 0);
>>>> it will pack addresses at 4 byte increment.
>>>> But in BPF program, the reading is in 8 byte increment.
>>> Can we avoid potential issues by requiring size % 8 == 0? Or maybe round down
>>> size to closest multiple of 8?
>>
>> This is what I mean:
>>   for bpf program: "long" means u64, so we allocate 64 * 8 buffer size
>>                    and pass it to the helper
>>   in the helper, the address will be increased along sizeof(long), which
>>                  is 4 for 32bit system.
>>           So address is recorded at buf, buf + 4, buf + 8, buf + 12, ...
>>   After the helper returns, the bpf program tries to retrieve
>>           the address at buf, buf + 8, buf + 16.
>>
>> The helper itself is okay. But BPF_SEQ_PRINTF above is wrong.
>> Is this interpretation correct?
> 
> Thanks for the clarification. I guess the best solution is to fix this
> once in the kernel, so BPF programs don't have to worry about it.

The kernel could make each entry 8 bytes. This will cause less potential
entries for 32bit, probably fine. Another option is BPF program declares 
an extern variable CONFIG_64BIT and it is 'y', that means 64 bit. 
Otherwise it is 32bit. libbpf should set CONFIG_64BIT correctly.

I guess storing each address as 64bit probably a better and less
confusion choice.

> 
> Song
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-06-25  5:29 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-23  7:07 [PATCH bpf-next 0/3] bpf: introduce bpf_get_task_stack_trace() Song Liu
2020-06-23  7:08 ` [PATCH bpf-next 1/3] bpf: introduce helper bpf_get_task_stack_trace() Song Liu
2020-06-23 15:19   ` Alexei Starovoitov
2020-06-23 16:59     ` Song Liu
2020-06-23 17:40       ` Song Liu
2020-06-23 18:41       ` Andrii Nakryiko
2020-06-23 15:22   ` Daniel Borkmann
2020-06-23 17:19     ` Song Liu
2020-06-23 18:45   ` Andrii Nakryiko
2020-06-23 22:53     ` Song Liu
2020-06-23  7:08 ` [PATCH bpf-next 2/3] bpf: allow %pB in bpf_seq_printf() Song Liu
2020-06-23 15:29   ` Daniel Borkmann
2020-06-23 17:19     ` Song Liu
2020-06-23  7:08 ` [PATCH bpf-next 3/3] selftests/bpf: add bpf_iter test with bpf_get_task_stack_trace() Song Liu
2020-06-23 18:57   ` Yonghong Song
2020-06-23 22:07     ` Song Liu
2020-06-23 22:27       ` Yonghong Song
2020-06-24 20:37         ` Song Liu
2020-06-25  5:29           ` Yonghong Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).