bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs
@ 2022-01-13 23:31 Kenny Yu
  2022-01-13 23:31 ` [PATCH bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
                   ` (8 more replies)
  0 siblings, 9 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-13 23:31 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This patch series makes the following changes:
* Adds a new bpf helper `bpf_access_process_vm` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task iterator
programs and this new bpf helper to read C++ async stack traces of a running
process for debugging C++ binaries in production.

Kenny Yu (3):
  bpf: Add bpf_access_process_vm() helper
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 10 +++++
 kernel/bpf/helpers.c                          | 19 ++++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 10 +++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 43 +++++++++++++++++++
 8 files changed, 102 insertions(+)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
@ 2022-01-13 23:31 ` Kenny Yu
  2022-01-13 23:31 ` [PATCH bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-13 23:31 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This adds a helper for bpf programs to access the memory of other
tasks. This also adds the ability for bpf iterator programs to
be sleepable.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 10 ++++++++++
 kernel/bpf/helpers.c           | 19 +++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 5 files changed, 42 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..d3d0ef8df148 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2222,6 +2222,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_access_process_vm_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..4b47ec8ae569 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5018,6 +5018,15 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * long bpf_access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags)
+ *	Description
+ *		Read *len* bytes from user space address *addr* in *tsk*'s
+ *		address space, and stores the data in *buf*. This is a wrapper
+ *		of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5206,6 +5215,7 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..dd588912e197 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,24 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_access_process_vm, struct task_struct *, tsk,
+	   unsigned long, addr, void *, buf, int, len, unsigned int, gup_flags)
+{
+	return access_process_vm(tsk, addr, buf, len, gup_flags);
+}
+
+const struct bpf_func_proto bpf_access_process_vm_proto = {
+	.func		= bpf_access_process_vm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..1a6a81ce2e36 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_find_vma_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
+	case BPF_FUNC_access_process_vm:
+		return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b0383d371b9a..4b47ec8ae569 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5018,6 +5018,15 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * long bpf_access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags)
+ *	Description
+ *		Read *len* bytes from user space address *addr* in *tsk*'s
+ *		address space, and stores the data in *buf*. This is a wrapper
+ *		of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5206,6 +5215,7 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
  2022-01-13 23:31 ` [PATCH bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-13 23:31 ` Kenny Yu
  2022-01-13 23:31 ` [PATCH bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-13 23:31 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fdb3536afa7d..9ec40835bce8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8599,6 +8599,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
  2022-01-13 23:31 ` [PATCH bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
  2022-01-13 23:31 ` [PATCH bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-13 23:31 ` Kenny Yu
  2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-13 23:31 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..fcda0ecd8746 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1266,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..bb4b63043533 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+// New helper added
+static long (*bpf_access_process_vm)(
+	struct task_struct *tsk,
+	unsigned long addr,
+	void *buf,
+	int len,
+	unsigned int gup_flags) = (void *)186;
+
+// Copied from include/linux/mm.h
+#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int numread;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	// Try to read the contents of the task's instruction pointer from the
+	// remote task's address space.
+	numread = bpf_access_process_vm(task,
+					(unsigned long)ptr,
+					(void *)&user_data,
+					sizeof(uint32_t),
+					FOLL_REMOTE);
+	if (numread != sizeof(uint32_t)) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
                   ` (2 preceding siblings ...)
  2022-01-13 23:31 ` [PATCH bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
@ 2022-01-14  0:48 ` Kenny Yu
  2022-01-14  0:48   ` [PATCH v2 bpf-next 1/4] bpf: Add bpf_access_process_vm() helper Kenny Yu
                     ` (3 more replies)
  2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
                   ` (4 subsequent siblings)
  8 siblings, 4 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-14  0:48 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This patch series makes the following changes:
* Adds a new bpf helper `bpf_access_process_vm` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task iterator
programs and this new bpf helper to read C++ async stack traces of a running
process for debugging C++ binaries in production.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.

Kenny Yu (4):
  bpf: Add bpf_access_process_vm() helper
  bpf: Add support for sleepable programs in bpf_iter_run_prog
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 10 ++++
 kernel/bpf/bpf_iter.c                         | 16 ++++--
 kernel/bpf/helpers.c                          | 19 +++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 10 ++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
 9 files changed, 126 insertions(+), 3 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v2 bpf-next 1/4] bpf: Add bpf_access_process_vm() helper
  2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-14  0:48   ` Kenny Yu
  2022-01-14  0:48   ` [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog Kenny Yu
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-14  0:48 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This adds a helper for bpf programs to access the memory of other
tasks. This also adds the ability for bpf iterator programs to
be sleepable.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 10 ++++++++++
 kernel/bpf/helpers.c           | 19 +++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 5 files changed, 42 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..d3d0ef8df148 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2222,6 +2222,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_access_process_vm_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..4b47ec8ae569 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5018,6 +5018,15 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * long bpf_access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags)
+ *	Description
+ *		Read *len* bytes from user space address *addr* in *tsk*'s
+ *		address space, and stores the data in *buf*. This is a wrapper
+ *		of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5206,6 +5215,7 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..dd588912e197 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,24 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_access_process_vm, struct task_struct *, tsk,
+	   unsigned long, addr, void *, buf, int, len, unsigned int, gup_flags)
+{
+	return access_process_vm(tsk, addr, buf, len, gup_flags);
+}
+
+const struct bpf_func_proto bpf_access_process_vm_proto = {
+	.func		= bpf_access_process_vm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..1a6a81ce2e36 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_find_vma_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
+	case BPF_FUNC_access_process_vm:
+		return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b0383d371b9a..4b47ec8ae569 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5018,6 +5018,15 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * long bpf_access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags)
+ *	Description
+ *		Read *len* bytes from user space address *addr* in *tsk*'s
+ *		address space, and stores the data in *buf*. This is a wrapper
+ *		of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5206,6 +5215,7 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog
  2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-14  0:48   ` [PATCH v2 bpf-next 1/4] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-14  0:48   ` Kenny Yu
  2022-01-14  2:50     ` Alexei Starovoitov
  2022-01-14  0:48   ` [PATCH v2 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
  2022-01-14  0:49   ` [PATCH v2 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
  3 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-14  0:48 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

The next patch adds the ability to create sleepable bpf iterator programs.
This changes `bpf_iter_run_prog` to use the appropriate synchronization for
sleepable bpf programs. With sleepable bpf iterator programs, we can no
longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
to protect the bpf program.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 kernel/bpf/bpf_iter.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..d814ca6454af 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+	}
 	ret = bpf_prog_run(prog, ctx);
 	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable)
+		rcu_read_unlock_trace();
+	else
+		rcu_read_unlock();
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v2 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-14  0:48   ` [PATCH v2 bpf-next 1/4] bpf: Add bpf_access_process_vm() helper Kenny Yu
  2022-01-14  0:48   ` [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog Kenny Yu
@ 2022-01-14  0:48   ` Kenny Yu
  2022-01-14  0:49   ` [PATCH v2 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
  3 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-14  0:48 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fdb3536afa7d..9ec40835bce8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8599,6 +8599,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
                     ` (2 preceding siblings ...)
  2022-01-14  0:48   ` [PATCH v2 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-14  0:49   ` Kenny Yu
  2022-01-14  2:39     ` Alexei Starovoitov
  3 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-14  0:49 UTC (permalink / raw)
  To: bpf, ast, andrii, daniel, yhs; +Cc: Kenny Yu

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..fcda0ecd8746 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1266,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..bb4b63043533 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+// New helper added
+static long (*bpf_access_process_vm)(
+	struct task_struct *tsk,
+	unsigned long addr,
+	void *buf,
+	int len,
+	unsigned int gup_flags) = (void *)186;
+
+// Copied from include/linux/mm.h
+#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int numread;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	// Try to read the contents of the task's instruction pointer from the
+	// remote task's address space.
+	numread = bpf_access_process_vm(task,
+					(unsigned long)ptr,
+					(void *)&user_data,
+					sizeof(uint32_t),
+					FOLL_REMOTE);
+	if (numread != sizeof(uint32_t)) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-14  0:49   ` [PATCH v2 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
@ 2022-01-14  2:39     ` Alexei Starovoitov
  2022-01-14 23:14       ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Alexei Starovoitov @ 2022-01-14  2:39 UTC (permalink / raw)
  To: Kenny Yu
  Cc: bpf, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Yonghong Song

On Thu, Jan 13, 2022 at 4:49 PM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a test for bpf iterator programs to make use of sleepable
> bpf helpers.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++++
>  .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
>  2 files changed, 70 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index b84f859b1267..fcda0ecd8746 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -138,6 +138,20 @@ static void test_task(void)
>         bpf_iter_task__destroy(skel);
>  }
>
> +static void test_task_sleepable(void)
> +{
> +       struct bpf_iter_task *skel;
> +
> +       skel = bpf_iter_task__open_and_load();
> +       if (CHECK(!skel, "bpf_iter_task__open_and_load",
> +                 "skeleton open_and_load failed\n"))
> +               return;
> +
> +       do_dummy_read(skel->progs.dump_task_sleepable);
> +
> +       bpf_iter_task__destroy(skel);
> +}
> +
>  static void test_task_stack(void)
>  {
>         struct bpf_iter_task_stack *skel;
> @@ -1252,6 +1266,8 @@ void test_bpf_iter(void)
>                 test_bpf_map();
>         if (test__start_subtest("task"))
>                 test_task();
> +       if (test__start_subtest("task_sleepable"))
> +               test_task_sleepable();
>         if (test__start_subtest("task_stack"))
>                 test_task_stack();
>         if (test__start_subtest("task_file"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
> index c86b93f33b32..bb4b63043533 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
> @@ -2,6 +2,7 @@
>  /* Copyright (c) 2020 Facebook */
>  #include "bpf_iter.h"
>  #include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
>
>  char _license[] SEC("license") = "GPL";
>
> @@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
>         BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
>         return 0;
>  }
> +
> +// New helper added
> +static long (*bpf_access_process_vm)(
> +       struct task_struct *tsk,
> +       unsigned long addr,
> +       void *buf,
> +       int len,
> +       unsigned int gup_flags) = (void *)186;

This shouldn't be needed.
Since patch 1 updates tools/include/uapi/linux/bpf.h
it will be in bpf_helper_defs.h automatically.

> +
> +// Copied from include/linux/mm.h
> +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */

Please use C style comments only.

> +       numread = bpf_access_process_vm(task,
> +                                       (unsigned long)ptr,
> +                                       (void *)&user_data,
> +                                       sizeof(uint32_t),
> +                                       FOLL_REMOTE);

We probably would need to hide flags like FOLL_REMOTE
inside the helper otherwise prog might confuse the kernel.
In this case I'm not even sure that FOLL_REMOTE is needed.
I suspect gup_flags=0 in all cases will work fine.
We're not doing write here and not pining anything.
fast_gup is not necessary either.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog
  2022-01-14  0:48   ` [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog Kenny Yu
@ 2022-01-14  2:50     ` Alexei Starovoitov
  2022-01-14 23:15       ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Alexei Starovoitov @ 2022-01-14  2:50 UTC (permalink / raw)
  To: Kenny Yu
  Cc: bpf, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Yonghong Song

On Thu, Jan 13, 2022 at 4:49 PM Kenny Yu <kennyyu@fb.com> wrote:
>
> The next patch adds the ability to create sleepable bpf iterator programs.
> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
> sleepable bpf programs. With sleepable bpf iterator programs, we can no
> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> to protect the bpf program.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>  kernel/bpf/bpf_iter.c | 16 +++++++++++++---
>  1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index b7aef5b3416d..d814ca6454af 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -5,6 +5,7 @@
>  #include <linux/anon_inodes.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
> +#include <linux/rcupdate_trace.h>
>
>  struct bpf_iter_target_info {
>         struct list_head list;
> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  {
>         int ret;
>
> -       rcu_read_lock();
> -       migrate_disable();
> +       if (prog->aux->sleepable) {
> +               rcu_read_lock_trace();

Pretty cool that a single 'if' is all that is needed to enable
sleepable iterators.

Maybe combine under one 'if' ?
if (prog->aux->sleepable) {
  lock_trace
  migr_dis
  might_fault
  bpf_prog_run
  migr_en
  unlock_trace
} else {
  lock
  migr_dis
  bpf_prog_run
  migr_end
  unlock
}

Would it be easier to read?

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-14  2:39     ` Alexei Starovoitov
@ 2022-01-14 23:14       ` Kenny Yu
  2022-01-15  1:38         ` Andrii Nakryiko
  0 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-14 23:14 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: andrii, ast, bpf, daniel, kennyyu, yhs

Hi Alexei,

> > +// New helper added
> > +static long (*bpf_access_process_vm)(
> > +       struct task_struct *tsk,
> > +       unsigned long addr,
> > +       void *buf,
> > +       int len,
> > +       unsigned int gup_flags) = (void *)186;
>
> This shouldn't be needed.
> Since patch 1 updates tools/include/uapi/linux/bpf.h
> it will be in bpf_helper_defs.h automatically.

I will fix. This is my first time writing selftests, so I am not too familiar
with how these are built and run. For my understanding, are these tests
meant to be built and run after booting the new kernel?

> > +
> > +// Copied from include/linux/mm.h
> > +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
>
> Please use C style comments only.

I will fix.

> > +       numread = bpf_access_process_vm(task,
> > +                                       (unsigned long)ptr,
> > +                                       (void *)&user_data,
> > +                                       sizeof(uint32_t),
> > +                                       FOLL_REMOTE);
> 
> We probably would need to hide flags like FOLL_REMOTE
> inside the helper otherwise prog might confuse the kernel.
> In this case I'm not even sure that FOLL_REMOTE is needed.
> I suspect gup_flags=0 in all cases will work fine.
> We're not doing write here and not pining anything.
> fast_gup is not necessary either.

Thanks for the suggestion! I'll remove the flag argument from the helper
to simplify the API for bpf programs. This means that the helper will have
the following signature:

  bpf_access_process_vm(struct task_struct *tsk,
                        unsigned long addr,
                        void *buf,
                        int len);

Thanks for the feedback!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog
  2022-01-14  2:50     ` Alexei Starovoitov
@ 2022-01-14 23:15       ` Kenny Yu
  0 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-14 23:15 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: andrii, ast, bpf, daniel, kennyyu, yhs

Hi Alexei,

> Pretty cool that a single 'if' is all that is needed to enable
> sleepable iterators.
> 
> Maybe combine under one 'if' ?
> if (prog->aux->sleepable) {
>   lock_trace
>   migr_dis
>   might_fault
>   bpf_prog_run
>   migr_en
>   unlock_trace
> } else {
>   lock
>   migr_dis
>   bpf_prog_run
>   migr_end
>   unlock
> }
>
> Would it be easier to read?

Yes, I agree, that is more readable. I'll make the change. I'll also
follow Yonghong Song's suggestion (offline) of merging this patch into
the first patch to keep bisectability together.

Thanks for the feedback!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-14 23:14       ` Kenny Yu
@ 2022-01-15  1:38         ` Andrii Nakryiko
  2022-01-15  4:30           ` Kenny Yu
  2022-01-15 16:27           ` Gabriele
  0 siblings, 2 replies; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-15  1:38 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Alexei Starovoitov, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Yonghong Song

On Fri, Jan 14, 2022 at 3:14 PM Kenny Yu <kennyyu@fb.com> wrote:
>
> Hi Alexei,
>
> > > +// New helper added
> > > +static long (*bpf_access_process_vm)(
> > > +       struct task_struct *tsk,
> > > +       unsigned long addr,
> > > +       void *buf,
> > > +       int len,
> > > +       unsigned int gup_flags) = (void *)186;
> >
> > This shouldn't be needed.
> > Since patch 1 updates tools/include/uapi/linux/bpf.h
> > it will be in bpf_helper_defs.h automatically.
>
> I will fix. This is my first time writing selftests, so I am not too familiar
> with how these are built and run. For my understanding, are these tests
> meant to be built and run after booting the new kernel?

Look at vmtest.sh under tools/testing/selftests/bpf, it handles
building kernel, selftests and spinning up qemu instance for running
selftests inside it.

>
> > > +
> > > +// Copied from include/linux/mm.h
> > > +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
> >
> > Please use C style comments only.
>
> I will fix.
>
> > > +       numread = bpf_access_process_vm(task,
> > > +                                       (unsigned long)ptr,
> > > +                                       (void *)&user_data,
> > > +                                       sizeof(uint32_t),
> > > +                                       FOLL_REMOTE);
> >
> > We probably would need to hide flags like FOLL_REMOTE
> > inside the helper otherwise prog might confuse the kernel.
> > In this case I'm not even sure that FOLL_REMOTE is needed.
> > I suspect gup_flags=0 in all cases will work fine.
> > We're not doing write here and not pining anything.
> > fast_gup is not necessary either.
>
> Thanks for the suggestion! I'll remove the flag argument from the helper
> to simplify the API for bpf programs. This means that the helper will have
> the following signature:
>
>   bpf_access_process_vm(struct task_struct *tsk,
>                         unsigned long addr,
>                         void *buf,
>                         int len);

keeping generic u64 flags makes sense for the future, so I'd keep it.

But I also wanted to point out that this helper is logically in the
same family as bpf_probe_read_kernel/user and bpf_copy_from_user, etc,
where we have consistent pattern that first two arguments specify
destination buffer (so buf + len) and the remaining ones specify
source (in probe_read it's just an address, here it's tsk_addr). So I
wonder if it would be less surprising and more consistent to reorder
and have:

buf, len, tsk, addr, flags

?

>
> Thanks for the feedback!
>
> Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-15  1:38         ` Andrii Nakryiko
@ 2022-01-15  4:30           ` Kenny Yu
  2022-01-15 16:27           ` Gabriele
  1 sibling, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-15  4:30 UTC (permalink / raw)
  To: andrii.nakryiko
  Cc: alexei.starovoitov, andrii, ast, bpf, daniel, kennyyu, yhs

Hi Andrii,

> Look at vmtest.sh under tools/testing/selftests/bpf, it handles
> building kernel, selftests and spinning up qemu instance for running
> selftests inside it.

Thanks, this helps!

> keeping generic u64 flags makes sense for the future, so I'd keep it.

That makes sense, I'll keep the flags in that case.

> But I also wanted to point out that this helper is logically in the
> same family as bpf_probe_read_kernel/user and bpf_copy_from_user, etc,
> where we have consistent pattern that first two arguments specify
> destination buffer (so buf + len) and the remaining ones specify
> source (in probe_read it's just an address, here it's tsk_addr). So I
> wonder if it would be less surprising and more consistent to reorder
> and have:
> 
> buf, len, tsk, addr, flags
>
> ?

Yeah, that looks better for consistency. Should I still keep the name
as `bpf_access_process_vm`, or call it something else to be more consistent
with the naming of the other bpf helpers? The benefit of the
`bpf_access_process_vm` name is that it makes it obvious it is wrapping
an existing function `access_process_vm`. 

Thanks for the feedback!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-15  1:38         ` Andrii Nakryiko
  2022-01-15  4:30           ` Kenny Yu
@ 2022-01-15 16:27           ` Gabriele
  2022-01-19 16:56             ` Kenny Yu
  1 sibling, 1 reply; 62+ messages in thread
From: Gabriele @ 2022-01-15 16:27 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Kenny Yu, Alexei Starovoitov, Alexei Starovoitov, bpf,
	Daniel Borkmann, Yonghong Song

On Sat, 15 Jan 2022 at 15:48, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Jan 14, 2022 at 3:14 PM Kenny Yu <kennyyu@fb.com> wrote:
> >
> > Hi Alexei,
> >
> > > > +// New helper added
> > > > +static long (*bpf_access_process_vm)(
> > > > +       struct task_struct *tsk,
> > > > +       unsigned long addr,
> > > > +       void *buf,
> > > > +       int len,
> > > > +       unsigned int gup_flags) = (void *)186;
> > >
> > > This shouldn't be needed.
> > > Since patch 1 updates tools/include/uapi/linux/bpf.h
> > > it will be in bpf_helper_defs.h automatically.
> >
> > I will fix. This is my first time writing selftests, so I am not too familiar
> > with how these are built and run. For my understanding, are these tests
> > meant to be built and run after booting the new kernel?
>
> Look at vmtest.sh under tools/testing/selftests/bpf, it handles
> building kernel, selftests and spinning up qemu instance for running
> selftests inside it.
>
> >
> > > > +
> > > > +// Copied from include/linux/mm.h
> > > > +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
> > >
> > > Please use C style comments only.
> >
> > I will fix.
> >
> > > > +       numread = bpf_access_process_vm(task,
> > > > +                                       (unsigned long)ptr,
> > > > +                                       (void *)&user_data,
> > > > +                                       sizeof(uint32_t),
> > > > +                                       FOLL_REMOTE);
> > >
> > > We probably would need to hide flags like FOLL_REMOTE
> > > inside the helper otherwise prog might confuse the kernel.
> > > In this case I'm not even sure that FOLL_REMOTE is needed.
> > > I suspect gup_flags=0 in all cases will work fine.
> > > We're not doing write here and not pining anything.
> > > fast_gup is not necessary either.
> >
> > Thanks for the suggestion! I'll remove the flag argument from the helper
> > to simplify the API for bpf programs. This means that the helper will have
> > the following signature:
> >
> >   bpf_access_process_vm(struct task_struct *tsk,
> >                         unsigned long addr,
> >                         void *buf,
> >                         int len);
>
> keeping generic u64 flags makes sense for the future, so I'd keep it.
>
> But I also wanted to point out that this helper is logically in the
> same family as bpf_probe_read_kernel/user and bpf_copy_from_user, etc,
> where we have consistent pattern that first two arguments specify
> destination buffer (so buf + len) and the remaining ones specify
> source (in probe_read it's just an address, here it's tsk_addr). So I
> wonder if it would be less surprising and more consistent to reorder
> and have:
>
> buf, len, tsk, addr, flags
>
> ?
>

I would personally find it more intuitive to have process information
passed as either the first argument (like process_vm_readv does), or
as "last", just before the flags (as extra information required w.r.t.
to local versions, e.g. bpf_copy_from_user).

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v2 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-15 16:27           ` Gabriele
@ 2022-01-19 16:56             ` Kenny Yu
  0 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 16:56 UTC (permalink / raw)
  To: phoenix1987; +Cc: alexei.starovoitov, andrii, ast, bpf, daniel, kennyyu, yhs

> > But I also wanted to point out that this helper is logically in the
> > same family as bpf_probe_read_kernel/user and bpf_copy_from_user, etc,
> > where we have consistent pattern that first two arguments specify
> > destination buffer (so buf + len) and the remaining ones specify
> > source (in probe_read it's just an address, here it's tsk_addr). So I
> > wonder if it would be less surprising and more consistent to reorder
> > and have:
> >
> > buf, len, tsk, addr, flags
> >
> > ?
> >
>
> I would personally find it more intuitive to have process information
> passed as either the first argument (like process_vm_readv does), or
> as "last", just before the flags (as extra information required w.r.t.
> to local versions, e.g. bpf_copy_from_user).

I think that makes sense. I'll combine both Andrii's and Gabriele's suggestions
and keep the signature as close to the existing helpers
(e.g., bpf_probe_read_user) and add the additional arguments at the end.
I'll proceed with this signature:

  bpf_access_process_vm(void *dst,
  			u32 size,
			const void *unsafe_ptr,
			struct task_struct *tsk,
			unsigned int flags);

Thanks for all the suggestions!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
                   ` (3 preceding siblings ...)
  2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-19 18:02 ` Kenny Yu
  2022-01-19 18:02   ` [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
                     ` (2 more replies)
  2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
                   ` (3 subsequent siblings)
  8 siblings, 3 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 18:02 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov, phoenix1987

This patch series makes the following changes:
* Adds a new bpf helper `bpf_access_process_vm` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.

Kenny Yu (3):
  bpf: Add bpf_access_process_vm() helper
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 10 +++++
 kernel/bpf/bpf_iter.c                         | 20 ++++++---
 kernel/bpf/helpers.c                          | 19 ++++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 10 +++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 43 +++++++++++++++++++
 9 files changed, 117 insertions(+), 5 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-19 18:02   ` Kenny Yu
  2022-01-19 20:16     ` Alexei Starovoitov
  2022-01-19 18:02   ` [PATCH v3 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
  2022-01-19 18:02   ` [PATCH v3 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 18:02 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov, phoenix1987

This adds a helper for bpf programs to access the memory of other
tasks. This also adds the ability for bpf iterator programs to
be sleepable.

This changes `bpf_iter_run_prog` to use the appropriate synchronization for
sleepable bpf programs. With sleepable bpf iterator programs, we can no
longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
to protect the bpf program.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/bpf_iter.c          | 20 +++++++++++++++-----
 kernel/bpf/helpers.c           | 21 +++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 6 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..d3d0ef8df148 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2222,6 +2222,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_access_process_vm_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b0383d371b9a..27c2a18d5941 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5018,6 +5018,16 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * long bpf_access_process_vm(void *dst, u32 size, const void *unsafe_ptr, struct task_struct *tsk, u32 flags)
+ *	Description
+ *		Read *size* bytes from user space address *unsafe_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This is a
+ *		wrapper of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5206,6 +5216,7 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
-	ret = bpf_prog_run(prog, ctx);
-	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock_trace();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock();
+	}
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..30f1067ca8dc 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,26 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
+	   const void __user *, user_ptr, struct task_struct *, tsk,
+	   u32, flags)
+{
+	/* flags is not used yet */
+	return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+}
+
+const struct bpf_func_proto bpf_access_process_vm_proto = {
+	.func		= bpf_access_process_vm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID,
+	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg5_type	= ARG_ANYTHING
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..1a6a81ce2e36 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_find_vma_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
+	case BPF_FUNC_access_process_vm:
+		return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b0383d371b9a..27c2a18d5941 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5018,6 +5018,16 @@ union bpf_attr {
  *
  *	Return
  *		The number of arguments of the traced function.
+ *
+ * long bpf_access_process_vm(void *dst, u32 size, const void *unsafe_ptr, struct task_struct *tsk, u32 flags)
+ *	Description
+ *		Read *size* bytes from user space address *unsafe_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This is a
+ *		wrapper of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5206,6 +5216,7 @@ union bpf_attr {
 	FN(get_func_arg),		\
 	FN(get_func_ret),		\
 	FN(get_func_arg_cnt),		\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v3 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-19 18:02   ` [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-19 18:02   ` Kenny Yu
  2022-01-19 18:02   ` [PATCH v3 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 18:02 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov, phoenix1987

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fdb3536afa7d..9ec40835bce8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8599,6 +8599,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v3 bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-19 18:02   ` [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
  2022-01-19 18:02   ` [PATCH v3 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-19 18:02   ` Kenny Yu
  2 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 18:02 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov, phoenix1987

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 44 +++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..fcda0ecd8746 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1266,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..81a86b3b7086 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,46 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int numread;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	numread = bpf_access_process_vm(&user_data,
+					sizeof(uint32_t),
+					ptr,
+					task,
+					0);
+	if (numread != sizeof(uint32_t)) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-19 18:02   ` [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-19 20:16     ` Alexei Starovoitov
  2022-01-19 22:51       ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Alexei Starovoitov @ 2022-01-19 20:16 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Yonghong Song, Gabriele

On Wed, Jan 19, 2022 at 10:03 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> +BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
> +          const void __user *, user_ptr, struct task_struct *, tsk,
> +          u32, flags)
> +{
> +       /* flags is not used yet */
> +       return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> +}

Though the 'flags' argument is unused the helper has to check
that it's zero and return -EINVAL otherwise.
If we don't do this we won't be able to change the behavior later
due to backward compatibility.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-19 20:16     ` Alexei Starovoitov
@ 2022-01-19 22:51       ` Kenny Yu
  0 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 22:51 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: andrii, ast, bpf, daniel, kennyyu, phoenix1987, yhs

> > +BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
> > +          const void __user *, user_ptr, struct task_struct *, tsk,
> > +          u32, flags)
> > +{
> > +       /* flags is not used yet */
> > +       return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> > +}
>
> Though the 'flags' argument is unused the helper has to check
> that it's zero and return -EINVAL otherwise.
> If we don't do this we won't be able to change the behavior later
> due to backward compatibility.

Thanks for the feedback! Will fix.

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
                   ` (4 preceding siblings ...)
  2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-19 22:59 ` Kenny Yu
  2022-01-19 22:59   ` [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
                     ` (2 more replies)
  2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
                   ` (2 subsequent siblings)
  8 siblings, 3 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 22:59 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, phoenix1987, alexei.starovoitov

This patch series makes the following changes:
* Adds a new bpf helper `bpf_access_process_vm` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v3:
* Check if `flags` is 0 and return -EINVAL if not.
* Rebase on latest bpf-next branch and fix merge conflicts.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.

Kenny Yu (3):
  bpf: Add bpf_access_process_vm() helper
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 11 +++++
 kernel/bpf/bpf_iter.c                         | 20 ++++++---
 kernel/bpf/helpers.c                          | 23 ++++++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 11 +++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 44 +++++++++++++++++++
 9 files changed, 124 insertions(+), 5 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-19 22:59   ` Kenny Yu
  2022-01-20  3:45     ` Yonghong Song
  2022-01-19 22:59   ` [PATCH v4 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
  2022-01-19 22:59   ` [PATCH v4 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 22:59 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, phoenix1987, alexei.starovoitov

This adds a helper for bpf programs to access the memory of other
tasks. This also adds the ability for bpf iterator programs to
be sleepable.

This changes `bpf_iter_run_prog` to use the appropriate synchronization for
sleepable bpf programs. With sleepable bpf iterator programs, we can no
longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
to protect the bpf program.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/bpf_iter.c          | 20 +++++++++++++++-----
 kernel/bpf/helpers.c           | 23 +++++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 6 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dce54eb0aae8..29f174c08126 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2220,6 +2220,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_access_process_vm_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fe2272defcd9..38a85e6756b2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5049,6 +5049,16 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_access_process_vm(void *dst, u32 size, const void *unsafe_ptr, struct task_struct *tsk, u32 flags)
+ *	Description
+ *		Read *size* bytes from user space address *unsafe_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This is a
+ *		wrapper of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5239,6 +5249,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
-	ret = bpf_prog_run(prog, ctx);
-	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock_trace();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock();
+	}
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..6cd76280fe66 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,28 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
+	   const void __user *, user_ptr, struct task_struct *, tsk,
+	   u32, flags)
+{
+	/* flags is not used yet */
+	if (flags != 0)
+		return -EINVAL;
+	return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+}
+
+const struct bpf_func_proto bpf_access_process_vm_proto = {
+	.func		= bpf_access_process_vm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID,
+	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg5_type	= ARG_ANYTHING
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..1a6a81ce2e36 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_find_vma_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
+	case BPF_FUNC_access_process_vm:
+		return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fe2272defcd9..38a85e6756b2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5049,6 +5049,16 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_access_process_vm(void *dst, u32 size, const void *unsafe_ptr, struct task_struct *tsk, u32 flags)
+ *	Description
+ *		Read *size* bytes from user space address *unsafe_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This is a
+ *		wrapper of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5239,6 +5249,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v4 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-19 22:59   ` [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-19 22:59   ` Kenny Yu
  2022-01-19 22:59   ` [PATCH v4 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 22:59 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, phoenix1987, alexei.starovoitov

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fdb3536afa7d..9ec40835bce8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8599,6 +8599,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v4 bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-19 22:59   ` [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
  2022-01-19 22:59   ` [PATCH v4 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-19 22:59   ` Kenny Yu
  2 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-19 22:59 UTC (permalink / raw)
  To: kennyyu; +Cc: andrii, ast, bpf, daniel, yhs, phoenix1987, alexei.starovoitov

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 44 +++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..fcda0ecd8746 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1266,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..81a86b3b7086 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,46 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int numread;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	numread = bpf_access_process_vm(&user_data,
+					sizeof(uint32_t),
+					ptr,
+					task,
+					0);
+	if (numread != sizeof(uint32_t)) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-19 22:59   ` [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-20  3:45     ` Yonghong Song
  2022-01-20 17:11       ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Yonghong Song @ 2022-01-20  3:45 UTC (permalink / raw)
  To: Kenny Yu; +Cc: andrii, ast, bpf, daniel, phoenix1987, alexei.starovoitov



On 1/19/22 2:59 PM, Kenny Yu wrote:
> This adds a helper for bpf programs to access the memory of other
> tasks. This also adds the ability for bpf iterator programs to
> be sleepable.
> 
> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
> sleepable bpf programs. With sleepable bpf iterator programs, we can no
> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> to protect the bpf program.
> 
> As an example use case at Meta, we are using a bpf task iterator program
> and this new helper to print C++ async stack traces for all threads of
> a given process.
> 
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>   include/linux/bpf.h            |  1 +
>   include/uapi/linux/bpf.h       | 11 +++++++++++
>   kernel/bpf/bpf_iter.c          | 20 +++++++++++++++-----
>   kernel/bpf/helpers.c           | 23 +++++++++++++++++++++++
>   kernel/trace/bpf_trace.c       |  2 ++
>   tools/include/uapi/linux/bpf.h | 11 +++++++++++
>   6 files changed, 63 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index dce54eb0aae8..29f174c08126 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2220,6 +2220,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
>   extern const struct bpf_func_proto bpf_find_vma_proto;
>   extern const struct bpf_func_proto bpf_loop_proto;
>   extern const struct bpf_func_proto bpf_strncmp_proto;
> +extern const struct bpf_func_proto bpf_access_process_vm_proto;
>   
>   const struct bpf_func_proto *tracing_prog_func_proto(
>     enum bpf_func_id func_id, const struct bpf_prog *prog);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index fe2272defcd9..38a85e6756b2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5049,6 +5049,16 @@ union bpf_attr {
>    *		This helper is currently supported by cgroup programs only.
>    *	Return
>    *		0 on success, or a negative error in case of failure.
> + *
> + * long bpf_access_process_vm(void *dst, u32 size, const void *unsafe_ptr, struct task_struct *tsk, u32 flags)

Maybe we can change 'flags' type to u64? This will leave more room for 
future potential extensions. In all recent helpers with added 'flags', 
most of them are u64.

> + *	Description
> + *		Read *size* bytes from user space address *unsafe_ptr* in *tsk*'s
> + *		address space, and stores the data in *dst*. *flags* is not
> + *		used yet and is provided for future extensibility. This is a
> + *		wrapper of **access_process_vm**\ ().
> + *	Return
> + *		The number of bytes written to the buffer, or a negative error
> + *		in case of failure.
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -5239,6 +5249,7 @@ union bpf_attr {
>   	FN(get_func_arg_cnt),		\
>   	FN(get_retval),			\
>   	FN(set_retval),			\
> +	FN(access_process_vm),		\
>   	/* */
>   
[...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-20  3:45     ` Yonghong Song
@ 2022-01-20 17:11       ` Kenny Yu
  0 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-20 17:11 UTC (permalink / raw)
  To: yhs; +Cc: alexei.starovoitov, andrii, ast, bpf, daniel, kennyyu, phoenix1987

> > + * long bpf_access_process_vm(void *dst, u32 size, const void *unsafe_ptr, struct task_struct *tsk, u32 flags)
> 
> Maybe we can change 'flags' type to u64? This will leave more room for 
> future potential extensions. In all recent helpers with added 'flags', 
> most of them are u64.

I'll change it to u64. Thanks for the suggestion!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
                   ` (5 preceding siblings ...)
  2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-20 17:29 ` Kenny Yu
  2022-01-20 17:29   ` [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
                     ` (2 more replies)
  2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
  8 siblings, 3 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-20 17:29 UTC (permalink / raw)
  To: kennyyu; +Cc: alexei.starovoitov, andrii, ast, bpf, daniel, phoenix1987, yhs

This patch series makes the following changes:
* Adds a new bpf helper `bpf_access_process_vm` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v4:
* Make `flags` into u64.
* Use `user_ptr` arg name to be consistent with `bpf_copy_from_user`.
* Add an extra check in selftests to verify access_process_vm calls
  succeeded.

Changes since v3:
* Check if `flags` is 0 and return -EINVAL if not.
* Rebase on latest bpf-next branch and fix merge conflicts.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.

Kenny Yu (3):
  bpf: Add bpf_access_process_vm() helper
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 11 +++++
 kernel/bpf/bpf_iter.c                         | 20 ++++++---
 kernel/bpf/helpers.c                          | 23 ++++++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 11 +++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 44 +++++++++++++++++++
 9 files changed, 124 insertions(+), 5 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-20 17:29   ` Kenny Yu
  2022-01-20 22:45     ` Andrii Nakryiko
  2022-01-20 17:29   ` [PATCH v5 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
  2022-01-20 17:29   ` [PATCH v5 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-20 17:29 UTC (permalink / raw)
  To: kennyyu; +Cc: alexei.starovoitov, andrii, ast, bpf, daniel, phoenix1987, yhs

This adds a helper for bpf programs to access the memory of other
tasks. This also adds the ability for bpf iterator programs to
be sleepable.

This changes `bpf_iter_run_prog` to use the appropriate synchronization for
sleepable bpf programs. With sleepable bpf iterator programs, we can no
longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
to protect the bpf program.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/bpf_iter.c          | 20 +++++++++++++++-----
 kernel/bpf/helpers.c           | 23 +++++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 6 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index dce54eb0aae8..29f174c08126 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2220,6 +2220,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_access_process_vm_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fe2272defcd9..2ac56e2512df 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5049,6 +5049,16 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_access_process_vm(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This is a
+ *		wrapper of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5239,6 +5249,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
-	ret = bpf_prog_run(prog, ctx);
-	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock_trace();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock();
+	}
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..9d7e86edc48e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,28 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
+	   const void __user *, user_ptr, struct task_struct *, tsk,
+	   u64, flags)
+{
+	/* flags is not used yet */
+	if (flags)
+		return -EINVAL;
+	return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+}
+
+const struct bpf_func_proto bpf_access_process_vm_proto = {
+	.func		= bpf_access_process_vm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID,
+	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg5_type	= ARG_ANYTHING
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..1a6a81ce2e36 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_find_vma_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
+	case BPF_FUNC_access_process_vm:
+		return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fe2272defcd9..2ac56e2512df 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5049,6 +5049,16 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_access_process_vm(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This is a
+ *		wrapper of **access_process_vm**\ ().
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5239,6 +5249,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(access_process_vm),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v5 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-20 17:29   ` [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-20 17:29   ` Kenny Yu
  2022-01-20 17:29   ` [PATCH v5 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-20 17:29 UTC (permalink / raw)
  To: kennyyu; +Cc: alexei.starovoitov, andrii, ast, bpf, daniel, phoenix1987, yhs

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fdb3536afa7d..9ec40835bce8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8599,6 +8599,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v5 bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
  2022-01-20 17:29   ` [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
  2022-01-20 17:29   ` [PATCH v5 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-20 17:29   ` Kenny Yu
  2022-01-20 22:48     ` Andrii Nakryiko
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-20 17:29 UTC (permalink / raw)
  To: kennyyu; +Cc: alexei.starovoitov, andrii, ast, bpf, daniel, phoenix1987, yhs

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 19 ++++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 47 +++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..f6fb4f95513d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,23 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	ASSERT_GT(skel->bss->num_success_access_process_vm, 0,
+		  "num_success_access_process_vm");
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1269,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..3fa735af96f7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,49 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+int num_success_access_process_vm = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int numread;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	numread = bpf_access_process_vm(&user_data,
+					sizeof(uint32_t),
+					ptr,
+					task,
+					0);
+	if (numread != sizeof(uint32_t)) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	++num_success_access_process_vm;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-20 17:29   ` [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
@ 2022-01-20 22:45     ` Andrii Nakryiko
  2022-01-21  2:27       ` Alexei Starovoitov
  0 siblings, 1 reply; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-20 22:45 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Alexei Starovoitov, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Gabriele, Yonghong Song

On Thu, Jan 20, 2022 at 9:30 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a helper for bpf programs to access the memory of other
> tasks. This also adds the ability for bpf iterator programs to
> be sleepable.
>
> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
> sleepable bpf programs. With sleepable bpf iterator programs, we can no
> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> to protect the bpf program.
>
> As an example use case at Meta, we are using a bpf task iterator program
> and this new helper to print C++ async stack traces for all threads of
> a given process.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/bpf.h       | 11 +++++++++++
>  kernel/bpf/bpf_iter.c          | 20 +++++++++++++++-----
>  kernel/bpf/helpers.c           | 23 +++++++++++++++++++++++
>  kernel/trace/bpf_trace.c       |  2 ++
>  tools/include/uapi/linux/bpf.h | 11 +++++++++++
>  6 files changed, 63 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index dce54eb0aae8..29f174c08126 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2220,6 +2220,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
>  extern const struct bpf_func_proto bpf_find_vma_proto;
>  extern const struct bpf_func_proto bpf_loop_proto;
>  extern const struct bpf_func_proto bpf_strncmp_proto;
> +extern const struct bpf_func_proto bpf_access_process_vm_proto;
>
>  const struct bpf_func_proto *tracing_prog_func_proto(
>    enum bpf_func_id func_id, const struct bpf_prog *prog);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index fe2272defcd9..2ac56e2512df 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5049,6 +5049,16 @@ union bpf_attr {
>   *             This helper is currently supported by cgroup programs only.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
> + *
> + * long bpf_access_process_vm(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
> + *     Description
> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
> + *             address space, and stores the data in *dst*. *flags* is not
> + *             used yet and is provided for future extensibility. This is a
> + *             wrapper of **access_process_vm**\ ().
> + *     Return
> + *             The number of bytes written to the buffer, or a negative error
> + *             in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5239,6 +5249,7 @@ union bpf_attr {
>         FN(get_func_arg_cnt),           \
>         FN(get_retval),                 \
>         FN(set_retval),                 \
> +       FN(access_process_vm),          \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index b7aef5b3416d..110029ede71e 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -5,6 +5,7 @@
>  #include <linux/anon_inodes.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
> +#include <linux/rcupdate_trace.h>
>
>  struct bpf_iter_target_info {
>         struct list_head list;
> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  {
>         int ret;
>
> -       rcu_read_lock();
> -       migrate_disable();
> -       ret = bpf_prog_run(prog, ctx);
> -       migrate_enable();
> -       rcu_read_unlock();
> +       if (prog->aux->sleepable) {
> +               rcu_read_lock_trace();
> +               migrate_disable();
> +               might_fault();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock_trace();
> +       } else {
> +               rcu_read_lock();
> +               migrate_disable();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock();
> +       }
>
>         /* bpf program can only return 0 or 1:
>          *  0 : okay
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 01cfdf40c838..9d7e86edc48e 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -16,6 +16,7 @@
>  #include <linux/pid_namespace.h>
>  #include <linux/proc_ns.h>
>  #include <linux/security.h>
> +#include <linux/btf_ids.h>
>
>  #include "../../lib/kstrtox.h"
>
> @@ -671,6 +672,28 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
>         .arg3_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
> +          const void __user *, user_ptr, struct task_struct *, tsk,
> +          u64, flags)
> +{
> +       /* flags is not used yet */
> +       if (flags)
> +               return -EINVAL;
> +       return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> +}
> +
> +const struct bpf_func_proto bpf_access_process_vm_proto = {
> +       .func           = bpf_access_process_vm,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
> +       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
> +       .arg3_type      = ARG_ANYTHING,
> +       .arg4_type      = ARG_PTR_TO_BTF_ID,
> +       .arg4_btf_id    = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
> +       .arg5_type      = ARG_ANYTHING
> +};
> +
>  BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
>  {
>         if (cpu >= nr_cpu_ids)
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 21aa30644219..1a6a81ce2e36 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_find_vma_proto;
>         case BPF_FUNC_trace_vprintk:
>                 return bpf_get_trace_vprintk_proto();
> +       case BPF_FUNC_access_process_vm:
> +               return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
>         default:
>                 return bpf_base_func_proto(func_id);
>         }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index fe2272defcd9..2ac56e2512df 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5049,6 +5049,16 @@ union bpf_attr {
>   *             This helper is currently supported by cgroup programs only.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
> + *
> + * long bpf_access_process_vm(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
> + *     Description
> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
> + *             address space, and stores the data in *dst*. *flags* is not
> + *             used yet and is provided for future extensibility. This is a
> + *             wrapper of **access_process_vm**\ ().
> + *     Return
> + *             The number of bytes written to the buffer, or a negative error
> + *             in case of failure.

wait, can it read less than *size* and return success?

bpf_probe_read_kernel() returns:

0 on success, or a negative error in case of failure.

Let's be consistent. Returning the number of read bytes makes more
sense in cases when we don't know the amount of bytes to be actually
read ahead of time (e.g., when reading zero-terminated strings).

BTW, should we also add a C string reading helper as well, just like
there is bpf_probe_read_user_str() and bpf_probe_read_user()?

Another thing, I think it's important to mention that this helper can
be used only from sleepable BPF programs.

And not to start the bikeshedding session, but we have
bpf_copy_from_user(), wouldn't something like
bpf_copy_from_user_{vm,process,remote}() be more in line and less
surprising for BPF users. BTW, "access" implies writing just as much
as reading, so using "access" in the sense of "read" seems wrong and
confusing.


>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5239,6 +5249,7 @@ union bpf_attr {
>         FN(get_func_arg_cnt),           \
>         FN(get_retval),                 \
>         FN(set_retval),                 \
> +       FN(access_process_vm),          \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-20 17:29   ` [PATCH v5 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
@ 2022-01-20 22:48     ` Andrii Nakryiko
  0 siblings, 0 replies; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-20 22:48 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Alexei Starovoitov, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Gabriele, Yonghong Song

On Thu, Jan 20, 2022 at 9:30 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a test for bpf iterator programs to make use of sleepable
> bpf helpers.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 19 ++++++++
>  .../selftests/bpf/progs/bpf_iter_task.c       | 47 +++++++++++++++++++
>  2 files changed, 66 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index b84f859b1267..f6fb4f95513d 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -138,6 +138,23 @@ static void test_task(void)
>         bpf_iter_task__destroy(skel);
>  }
>
> +static void test_task_sleepable(void)
> +{
> +       struct bpf_iter_task *skel;
> +
> +       skel = bpf_iter_task__open_and_load();
> +       if (CHECK(!skel, "bpf_iter_task__open_and_load",
> +                 "skeleton open_and_load failed\n"))

Please use ASSERT_OK_PTR() instead.

> +               return;
> +
> +       do_dummy_read(skel->progs.dump_task_sleepable);
> +
> +       ASSERT_GT(skel->bss->num_success_access_process_vm, 0,
> +                 "num_success_access_process_vm");
> +
> +       bpf_iter_task__destroy(skel);
> +}
> +
>  static void test_task_stack(void)
>  {
>         struct bpf_iter_task_stack *skel;
> @@ -1252,6 +1269,8 @@ void test_bpf_iter(void)
>                 test_bpf_map();
>         if (test__start_subtest("task"))
>                 test_task();
> +       if (test__start_subtest("task_sleepable"))
> +               test_task_sleepable();
>         if (test__start_subtest("task_stack"))
>                 test_task_stack();
>         if (test__start_subtest("task_file"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
> index c86b93f33b32..3fa735af96f7 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
> @@ -2,6 +2,7 @@
>  /* Copyright (c) 2020 Facebook */
>  #include "bpf_iter.h"
>  #include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
>
>  char _license[] SEC("license") = "GPL";
>
> @@ -23,3 +24,49 @@ int dump_task(struct bpf_iter__task *ctx)
>         BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
>         return 0;
>  }
> +
> +int num_success_access_process_vm = 0;
> +
> +SEC("iter.s/task")
> +int dump_task_sleepable(struct bpf_iter__task *ctx)
> +{
> +       struct seq_file *seq = ctx->meta->seq;
> +       struct task_struct *task = ctx->task;
> +       static const char info[] = "    === END ===";
> +       struct pt_regs *regs;
> +       void *ptr;
> +       uint32_t user_data = 0;
> +       int numread;
> +
> +       if (task == (void *)0) {
> +               BPF_SEQ_PRINTF(seq, "%s\n", info);
> +               return 0;
> +       }
> +
> +       regs = (struct pt_regs *)bpf_task_pt_regs(task);
> +       if (regs == (void *)0) {
> +               BPF_SEQ_PRINTF(seq, "%s\n", info);
> +               return 0;
> +       }
> +       ptr = (void *)PT_REGS_IP(regs);
> +
> +       /* Try to read the contents of the task's instruction pointer from the
> +        * remote task's address space.
> +        */
> +       numread = bpf_access_process_vm(&user_data,
> +                                       sizeof(uint32_t),
> +                                       ptr,
> +                                       task,
> +                                       0);

nit: keep it on one line (up to 100 characters is ok)


> +       if (numread != sizeof(uint32_t)) {
> +               BPF_SEQ_PRINTF(seq, "%s\n", info);
> +               return 0;
> +       }
> +       ++num_success_access_process_vm;
> +
> +       if (ctx->meta->seq_num == 0)
> +               BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
> +
> +       BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
> +       return 0;
> +}
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-20 22:45     ` Andrii Nakryiko
@ 2022-01-21  2:27       ` Alexei Starovoitov
  2022-01-21 17:20         ` Andrii Nakryiko
  0 siblings, 1 reply; 62+ messages in thread
From: Alexei Starovoitov @ 2022-01-21  2:27 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Gabriele, Yonghong Song

On Thu, Jan 20, 2022 at 2:46 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
a
> > + *             wrapper of **access_process_vm**\ ().
> > + *     Return
> > + *             The number of bytes written to the buffer, or a negative error
> > + *             in case of failure.
>
> wait, can it read less than *size* and return success?
>
> bpf_probe_read_kernel() returns:
>
> 0 on success, or a negative error in case of failure.
>
> Let's be consistent. Returning the number of read bytes makes more
> sense in cases when we don't know the amount of bytes to be actually
> read ahead of time (e.g., when reading zero-terminated strings).
>
> BTW, should we also add a C string reading helper as well, just like
> there is bpf_probe_read_user_str() and bpf_probe_read_user()?

That would be difficult. There is no suitable kernel api for that.

> Another thing, I think it's important to mention that this helper can
> be used only from sleepable BPF programs.
>
> And not to start the bikeshedding session, but we have
> bpf_copy_from_user(), wouldn't something like
> bpf_copy_from_user_{vm,process,remote}() be more in line and less
> surprising for BPF users. BTW, "access" implies writing just as much
> as reading, so using "access" in the sense of "read" seems wrong and
> confusing.

How about bpf_copy_from_user_task() ?
The task is the second to last argument, so the name fits ?
Especially if we call it this way it would be best to align
return codes with bpf_copy_from_user.
Adding memset() in case of failure is mandatory too.
I've missed this bit earlier.

The question is to decide what to do with
ret > 0 && ret < size condition.
Is it a failure and we should memset() the whole buffer and
return -EFAULT or memset only the leftover bytes and return 0?
I think the former is best to align with bpf_copy_from_user.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-21  2:27       ` Alexei Starovoitov
@ 2022-01-21 17:20         ` Andrii Nakryiko
  2022-01-21 17:41           ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-21 17:20 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Gabriele, Yonghong Song

On Thu, Jan 20, 2022 at 6:28 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Jan 20, 2022 at 2:46 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> a
> > > + *             wrapper of **access_process_vm**\ ().
> > > + *     Return
> > > + *             The number of bytes written to the buffer, or a negative error
> > > + *             in case of failure.
> >
> > wait, can it read less than *size* and return success?
> >
> > bpf_probe_read_kernel() returns:
> >
> > 0 on success, or a negative error in case of failure.
> >
> > Let's be consistent. Returning the number of read bytes makes more
> > sense in cases when we don't know the amount of bytes to be actually
> > read ahead of time (e.g., when reading zero-terminated strings).
> >
> > BTW, should we also add a C string reading helper as well, just like
> > there is bpf_probe_read_user_str() and bpf_probe_read_user()?
>
> That would be difficult. There is no suitable kernel api for that.

Ok, but maybe we can add it later. Otherwise it will be hard to
profiler Python processes and such, because you most certainly will
need to read zero-terminated strings there.

>
> > Another thing, I think it's important to mention that this helper can
> > be used only from sleepable BPF programs.
> >
> > And not to start the bikeshedding session, but we have
> > bpf_copy_from_user(), wouldn't something like
> > bpf_copy_from_user_{vm,process,remote}() be more in line and less
> > surprising for BPF users. BTW, "access" implies writing just as much
> > as reading, so using "access" in the sense of "read" seems wrong and
> > confusing.
>
> How about bpf_copy_from_user_task() ?
> The task is the second to last argument, so the name fits ?

yeah, I like the name


> Especially if we call it this way it would be best to align
> return codes with bpf_copy_from_user.
> Adding memset() in case of failure is mandatory too.
> I've missed this bit earlier.

Yep, good catch! Seems like copy_from_user() currently returns amount
of bytes *not* read and memsets those unread bytes to zero. So for
efficiency we could probably memset only those that were read.

>
> The question is to decide what to do with
> ret > 0 && ret < size condition.
> Is it a failure and we should memset() the whole buffer and
> return -EFAULT or memset only the leftover bytes and return 0?
> I think the former is best to align with bpf_copy_from_user.

Yeah, I think all or nothing approach (either complete success and
zero return, or memset and error return) is best and most in line with
other similar helpers.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-21 17:20         ` Andrii Nakryiko
@ 2022-01-21 17:41           ` Kenny Yu
  2022-01-21 17:47             ` Alexei Starovoitov
  0 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-21 17:41 UTC (permalink / raw)
  To: andrii.nakryiko
  Cc: alexei.starovoitov, andrii, ast, bpf, daniel, kennyyu, phoenix1987, yhs

> > How about bpf_copy_from_user_task() ?
> > The task is the second to last argument, so the name fits ?
> 
> yeah, I like the name

I'll change the name to `bpf_copy_from_user_task`.

> > Especially if we call it this way it would be best to align
> > return codes with bpf_copy_from_user.
> > Adding memset() in case of failure is mandatory too.
> > I've missed this bit earlier.
>
> Yep, good catch! Seems like copy_from_user() currently returns amount
> of bytes *not* read and memsets those unread bytes to zero. So for
> efficiency we could probably memset only those that were read.
>
> > The question is to decide what to do with
> > ret > 0 && ret < size condition.
> > Is it a failure and we should memset() the whole buffer and
> > return -EFAULT or memset only the leftover bytes and return 0?
> > I think the former is best to align with bpf_copy_from_user.
>
> Yeah, I think all or nothing approach (either complete success and
> zero return, or memset and error return) is best and most in line with
> other similar helpers.

Thanks for the suggestions! I'll go with the all-or-nothing approach to
be consistent with `bpf_copy_from_user` and will make the following changes:

* Return value: returns 0 on success, or negative error on failure.
* If we had a partial read, we will memset the read bytes to 0 and return
  -EFAULT

> Another thing, I think it's important to mention that this helper can
> be used only from sleepable BPF programs.

Will add that to the docs.

> > That would be difficult. There is no suitable kernel api for that.
>
> Ok, but maybe we can add it later. Otherwise it will be hard to
> profiler Python processes and such, because you most certainly will
> need to read zero-terminated strings there.

I will NOT add a C string helper in this patch series, and I'll explore
how to add this in the future once this patch series is merged.

> > +       skel = bpf_iter_task__open_and_load();
> > +       if (CHECK(!skel, "bpf_iter_task__open_and_load",
> > +                 "skeleton open_and_load failed\n"))
>
> Please use ASSERT_OK_PTR() instead.

Will fix.

> > +       numread = bpf_access_process_vm(&user_data,
> > +                                       sizeof(uint32_t),
> > +                                       ptr,
> > +                                       task,
> > +                                       0);
>
> nit: keep it on one line (up to 100 characters is ok)

Will fix.

Thanks for the suggestions everyone!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-21 17:41           ` Kenny Yu
@ 2022-01-21 17:47             ` Alexei Starovoitov
  2022-01-21 18:30               ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Alexei Starovoitov @ 2022-01-21 17:47 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Gabriele, Yonghong Song

On Fri, Jan 21, 2022 at 9:42 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> Thanks for the suggestions! I'll go with the all-or-nothing approach to
> be consistent with `bpf_copy_from_user` and will make the following changes:
>
> * Return value: returns 0 on success, or negative error on failure.
> * If we had a partial read, we will memset the read bytes to 0 and return
>   -EFAULT

Not read bytes, but all bytes.
copy_from_user zeros leftover for us, but access_process_vm doesn't
do this on partial read.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
  2022-01-21 17:47             ` Alexei Starovoitov
@ 2022-01-21 18:30               ` Kenny Yu
  0 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-21 18:30 UTC (permalink / raw)
  To: alexei.starovoitov
  Cc: andrii.nakryiko, andrii, ast, bpf, daniel, kennyyu, phoenix1987, yhs

> > * If we had a partial read, we will memset the read bytes to 0 and return
> >   -EFAULT
> 
> Not read bytes, but all bytes.
> copy_from_user zeros leftover for us, but access_process_vm doesn't
> do this on partial read.

I'll memset all bytes on error. Thanks for the suggestion!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
                   ` (6 preceding siblings ...)
  2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
@ 2022-01-21 19:30 ` Kenny Yu
  2022-01-21 19:30   ` [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
                     ` (2 more replies)
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
  8 siblings, 3 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-21 19:30 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This patch series makes the following changes:
* Adds a new bpf helper `bpf_copy_from_user_task` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v5:
* Rename `bpf_access_process_vm` to `bpf_copy_from_user_task`.
* Change return value to be all-or-nothing. If we get a partial read,
  memset all bytes to 0 and return -EFAULT.
* Add to docs that the helper can only be used by sleepable BPF programs.
* Fix nits in selftests.

Changes since v4:
* Make `flags` into u64.
* Use `user_ptr` arg name to be consistent with `bpf_copy_from_user`.
* Add an extra check in selftests to verify access_process_vm calls
  succeeded.

Changes since v3:
* Check if `flags` is 0 and return -EINVAL if not.
* Rebase on latest bpf-next branch and fix merge conflicts.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.

Kenny Yu (3):
  bpf: Add bpf_copy_from_user_task() helper
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 10 +++++
 kernel/bpf/bpf_iter.c                         | 20 ++++++---
 kernel/bpf/helpers.c                          | 31 +++++++++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 10 +++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 18 ++++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 43 +++++++++++++++++++
 9 files changed, 131 insertions(+), 5 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
@ 2022-01-21 19:30   ` Kenny Yu
  2022-01-21 19:53     ` Andrii Nakryiko
  2022-01-21 19:30   ` [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
  2022-01-21 19:30   ` [PATCH v6 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-21 19:30 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This adds a helper for bpf programs to read the memory of other
tasks. This also adds the ability for bpf iterator programs to
be sleepable.

This changes `bpf_iter_run_prog` to use the appropriate synchronization for
sleepable bpf programs. With sleepable bpf iterator programs, we can no
longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
to protect the bpf program.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 10 ++++++++++
 kernel/bpf/bpf_iter.c          | 20 ++++++++++++++-----
 kernel/bpf/helpers.c           | 35 ++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 6 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 80e3387ea3af..5917883e528b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2229,6 +2229,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fe2272defcd9..d82d9423874d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5049,6 +5049,15 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5239,6 +5248,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
-	ret = bpf_prog_run(prog, ctx);
-	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock_trace();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock();
+	}
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..3bc37016fad0 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,40 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
+	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
+{
+	int ret;
+
+	/* flags is not used yet */
+	if (flags)
+		return -EINVAL;
+
+	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+	if (ret >= 0) {
+		if (ret == size)
+			ret = 0;
+		else {
+			/* If partial read, clear all bytes and return error */
+			memset(dst, 0, size);
+			ret = -EFAULT;
+		}
+	}
+	return ret;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_task_proto = {
+	.func		= bpf_copy_from_user_task,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID,
+	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg5_type	= ARG_ANYTHING
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..d07f568646cb 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_task_stack_proto;
 	case BPF_FUNC_copy_from_user:
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+	case BPF_FUNC_copy_from_user_task:
+		return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_per_cpu_ptr:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fe2272defcd9..d82d9423874d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5049,6 +5049,15 @@ union bpf_attr {
  *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5239,6 +5248,7 @@ union bpf_attr {
 	FN(get_func_arg_cnt),		\
 	FN(get_retval),			\
 	FN(set_retval),			\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
  2022-01-21 19:30   ` [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
@ 2022-01-21 19:30   ` Kenny Yu
  2022-01-21 19:54     ` Andrii Nakryiko
  2022-01-21 19:30   ` [PATCH v6 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-21 19:30 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fc6d530e20db..ea7149606e67 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8607,6 +8607,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v6 bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
  2022-01-21 19:30   ` [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
  2022-01-21 19:30   ` [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-21 19:30   ` Kenny Yu
  2022-01-21 19:55     ` Andrii Nakryiko
  2 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-21 19:30 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 20 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..5142a7d130b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,24 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+		  "num_expected_failure_copy_from_user_task");
+	ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+		  "num_success_copy_from_user_task");
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..d22741272692 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_success_copy_from_user_task = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int ret;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Read an invalid pointer and ensure we get an error */
+	ptr = NULL;
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		++num_expected_failure_copy_from_user_task;
+	} else {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	++num_success_copy_from_user_task;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 19:30   ` [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
@ 2022-01-21 19:53     ` Andrii Nakryiko
  2022-01-21 20:04       ` Yonghong Song
  2022-01-22  9:58       ` Gabriele
  0 siblings, 2 replies; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-21 19:53 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Yonghong Song, Alexei Starovoitov, Gabriele

On Fri, Jan 21, 2022 at 11:31 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a helper for bpf programs to read the memory of other
> tasks. This also adds the ability for bpf iterator programs to
> be sleepable.
>
> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
> sleepable bpf programs. With sleepable bpf iterator programs, we can no
> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> to protect the bpf program.
>
> As an example use case at Meta, we are using a bpf task iterator program
> and this new helper to print C++ async stack traces for all threads of
> a given process.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/bpf.h       | 10 ++++++++++
>  kernel/bpf/bpf_iter.c          | 20 ++++++++++++++-----
>  kernel/bpf/helpers.c           | 35 ++++++++++++++++++++++++++++++++++
>  kernel/trace/bpf_trace.c       |  2 ++
>  tools/include/uapi/linux/bpf.h | 10 ++++++++++
>  6 files changed, 73 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 80e3387ea3af..5917883e528b 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2229,6 +2229,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
>  extern const struct bpf_func_proto bpf_find_vma_proto;
>  extern const struct bpf_func_proto bpf_loop_proto;
>  extern const struct bpf_func_proto bpf_strncmp_proto;
> +extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
>
>  const struct bpf_func_proto *tracing_prog_func_proto(
>    enum bpf_func_id func_id, const struct bpf_prog *prog);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index fe2272defcd9..d82d9423874d 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5049,6 +5049,15 @@ union bpf_attr {
>   *             This helper is currently supported by cgroup programs only.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
> + *
> + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
> + *     Description
> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
> + *             address space, and stores the data in *dst*. *flags* is not
> + *             used yet and is provided for future extensibility. This helper
> + *             can only be used by sleepable programs.

"On error dst buffer is zeroed out."? This is an explicit guarantee.

> + *     Return
> + *             0 on success, or a negative error in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5239,6 +5248,7 @@ union bpf_attr {
>         FN(get_func_arg_cnt),           \
>         FN(get_retval),                 \
>         FN(set_retval),                 \
> +       FN(copy_from_user_task),        \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index b7aef5b3416d..110029ede71e 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -5,6 +5,7 @@
>  #include <linux/anon_inodes.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
> +#include <linux/rcupdate_trace.h>
>
>  struct bpf_iter_target_info {
>         struct list_head list;
> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  {
>         int ret;
>
> -       rcu_read_lock();
> -       migrate_disable();
> -       ret = bpf_prog_run(prog, ctx);
> -       migrate_enable();
> -       rcu_read_unlock();
> +       if (prog->aux->sleepable) {
> +               rcu_read_lock_trace();
> +               migrate_disable();
> +               might_fault();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock_trace();
> +       } else {
> +               rcu_read_lock();
> +               migrate_disable();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock();
> +       }
>

I think this sleepable bpf_iter change deserves its own patch. It has
nothing to do with bpf_copy_from_user_task() helper.

>         /* bpf program can only return 0 or 1:
>          *  0 : okay
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 01cfdf40c838..3bc37016fad0 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -16,6 +16,7 @@
>  #include <linux/pid_namespace.h>
>  #include <linux/proc_ns.h>
>  #include <linux/security.h>
> +#include <linux/btf_ids.h>
>
>  #include "../../lib/kstrtox.h"
>
> @@ -671,6 +672,40 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
>         .arg3_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
> +          const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
> +{
> +       int ret;
> +
> +       /* flags is not used yet */
> +       if (flags)
> +               return -EINVAL;
> +
> +       ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> +       if (ret >= 0) {
> +               if (ret == size)
> +                       ret = 0;

is there a point in calling access_process_vm() with size == 0? It
would validate that get_task_mm() succeeds, but that's pretty much it?
So maybe instead just exit early if size is zero? It will be also less
convoluted logic:

if (size == 0)
    return 0;
if (access_process_vm(...)) {
    memset(0);
    return -EFAULT;
}
return 0;

> +               else {
> +                       /* If partial read, clear all bytes and return error */
> +                       memset(dst, 0, size);
> +                       ret = -EFAULT;
> +               }
> +       }
> +       return ret;
> +}
> +

[...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-21 19:30   ` [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-21 19:54     ` Andrii Nakryiko
  0 siblings, 0 replies; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-21 19:54 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Yonghong Song, Alexei Starovoitov, Gabriele

On Fri, Jan 21, 2022 at 11:31 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a new bpf section "iter.s" to allow bpf iterator programs to
> be sleepable.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  tools/lib/bpf/libbpf.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index fc6d530e20db..ea7149606e67 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -8607,6 +8607,7 @@ static const struct bpf_sec_def section_defs[] = {
>         SEC_DEF("lsm/",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
>         SEC_DEF("lsm.s/",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
>         SEC_DEF("iter/",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
> +       SEC_DEF("iter.s/",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
>         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
>         SEC_DEF("xdp_devmap/",          XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
>         SEC_DEF("xdp_cpumap/",          XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 3/3] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-21 19:30   ` [PATCH v6 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
@ 2022-01-21 19:55     ` Andrii Nakryiko
  0 siblings, 0 replies; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-21 19:55 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Yonghong Song, Alexei Starovoitov, Gabriele

On Fri, Jan 21, 2022 at 11:31 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a test for bpf iterator programs to make use of sleepable
> bpf helpers.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  .../selftests/bpf/prog_tests/bpf_iter.c       | 20 +++++++
>  .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
>  2 files changed, 74 insertions(+)
>

[...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 19:53     ` Andrii Nakryiko
@ 2022-01-21 20:04       ` Yonghong Song
  2022-01-21 20:07         ` Andrii Nakryiko
  2022-01-22  9:58       ` Gabriele
  1 sibling, 1 reply; 62+ messages in thread
From: Yonghong Song @ 2022-01-21 20:04 UTC (permalink / raw)
  To: Andrii Nakryiko, Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Alexei Starovoitov, Gabriele



On 1/21/22 11:53 AM, Andrii Nakryiko wrote:
> On Fri, Jan 21, 2022 at 11:31 AM Kenny Yu <kennyyu@fb.com> wrote:
>>
>> This adds a helper for bpf programs to read the memory of other
>> tasks. This also adds the ability for bpf iterator programs to
>> be sleepable.
>>
>> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
>> sleepable bpf programs. With sleepable bpf iterator programs, we can no
>> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
>> to protect the bpf program.
>>
>> As an example use case at Meta, we are using a bpf task iterator program
>> and this new helper to print C++ async stack traces for all threads of
>> a given process.
>>
>> Signed-off-by: Kenny Yu <kennyyu@fb.com>
>> ---
>>   include/linux/bpf.h            |  1 +
>>   include/uapi/linux/bpf.h       | 10 ++++++++++
>>   kernel/bpf/bpf_iter.c          | 20 ++++++++++++++-----
>>   kernel/bpf/helpers.c           | 35 ++++++++++++++++++++++++++++++++++
>>   kernel/trace/bpf_trace.c       |  2 ++
>>   tools/include/uapi/linux/bpf.h | 10 ++++++++++
>>   6 files changed, 73 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 80e3387ea3af..5917883e528b 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -2229,6 +2229,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
>>   extern const struct bpf_func_proto bpf_find_vma_proto;
>>   extern const struct bpf_func_proto bpf_loop_proto;
>>   extern const struct bpf_func_proto bpf_strncmp_proto;
>> +extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
>>
>>   const struct bpf_func_proto *tracing_prog_func_proto(
>>     enum bpf_func_id func_id, const struct bpf_prog *prog);
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index fe2272defcd9..d82d9423874d 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -5049,6 +5049,15 @@ union bpf_attr {
>>    *             This helper is currently supported by cgroup programs only.
>>    *     Return
>>    *             0 on success, or a negative error in case of failure.
>> + *
>> + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
>> + *     Description
>> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
>> + *             address space, and stores the data in *dst*. *flags* is not
>> + *             used yet and is provided for future extensibility. This helper
>> + *             can only be used by sleepable programs.
> 
> "On error dst buffer is zeroed out."? This is an explicit guarantee.
> 
>> + *     Return
>> + *             0 on success, or a negative error in case of failure.
>>    */
>>   #define __BPF_FUNC_MAPPER(FN)          \
>>          FN(unspec),                     \
>> @@ -5239,6 +5248,7 @@ union bpf_attr {
>>          FN(get_func_arg_cnt),           \
>>          FN(get_retval),                 \
>>          FN(set_retval),                 \
>> +       FN(copy_from_user_task),        \
>>          /* */
>>
>>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>> index b7aef5b3416d..110029ede71e 100644
>> --- a/kernel/bpf/bpf_iter.c
>> +++ b/kernel/bpf/bpf_iter.c
>> @@ -5,6 +5,7 @@
>>   #include <linux/anon_inodes.h>
>>   #include <linux/filter.h>
>>   #include <linux/bpf.h>
>> +#include <linux/rcupdate_trace.h>
>>
>>   struct bpf_iter_target_info {
>>          struct list_head list;
>> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>>   {
>>          int ret;
>>
>> -       rcu_read_lock();
>> -       migrate_disable();
>> -       ret = bpf_prog_run(prog, ctx);
>> -       migrate_enable();
>> -       rcu_read_unlock();
>> +       if (prog->aux->sleepable) {
>> +               rcu_read_lock_trace();
>> +               migrate_disable();
>> +               might_fault();
>> +               ret = bpf_prog_run(prog, ctx);
>> +               migrate_enable();
>> +               rcu_read_unlock_trace();
>> +       } else {
>> +               rcu_read_lock();
>> +               migrate_disable();
>> +               ret = bpf_prog_run(prog, ctx);
>> +               migrate_enable();
>> +               rcu_read_unlock();
>> +       }
>>
> 
> I think this sleepable bpf_iter change deserves its own patch. It has
> nothing to do with bpf_copy_from_user_task() helper.

Without the above change, using bpf_copy_from_user_task() will trigger 
rcu warning and may produce incorrect result. One option is to put
the above in a preparation patch before introducing 
bpf_copy_from_user_task() so we won't have bisecting issues.

> 
>>          /* bpf program can only return 0 or 1:
>>           *  0 : okay
[...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 20:04       ` Yonghong Song
@ 2022-01-21 20:07         ` Andrii Nakryiko
  2022-01-21 21:15           ` Yonghong Song
  0 siblings, 1 reply; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-21 20:07 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Alexei Starovoitov, Gabriele

On Fri, Jan 21, 2022 at 12:04 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 1/21/22 11:53 AM, Andrii Nakryiko wrote:
> > On Fri, Jan 21, 2022 at 11:31 AM Kenny Yu <kennyyu@fb.com> wrote:
> >>
> >> This adds a helper for bpf programs to read the memory of other
> >> tasks. This also adds the ability for bpf iterator programs to
> >> be sleepable.
> >>
> >> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
> >> sleepable bpf programs. With sleepable bpf iterator programs, we can no
> >> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> >> to protect the bpf program.
> >>
> >> As an example use case at Meta, we are using a bpf task iterator program
> >> and this new helper to print C++ async stack traces for all threads of
> >> a given process.
> >>
> >> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> >> ---
> >>   include/linux/bpf.h            |  1 +
> >>   include/uapi/linux/bpf.h       | 10 ++++++++++
> >>   kernel/bpf/bpf_iter.c          | 20 ++++++++++++++-----
> >>   kernel/bpf/helpers.c           | 35 ++++++++++++++++++++++++++++++++++
> >>   kernel/trace/bpf_trace.c       |  2 ++
> >>   tools/include/uapi/linux/bpf.h | 10 ++++++++++
> >>   6 files changed, 73 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >> index 80e3387ea3af..5917883e528b 100644
> >> --- a/include/linux/bpf.h
> >> +++ b/include/linux/bpf.h
> >> @@ -2229,6 +2229,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
> >>   extern const struct bpf_func_proto bpf_find_vma_proto;
> >>   extern const struct bpf_func_proto bpf_loop_proto;
> >>   extern const struct bpf_func_proto bpf_strncmp_proto;
> >> +extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
> >>
> >>   const struct bpf_func_proto *tracing_prog_func_proto(
> >>     enum bpf_func_id func_id, const struct bpf_prog *prog);
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index fe2272defcd9..d82d9423874d 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -5049,6 +5049,15 @@ union bpf_attr {
> >>    *             This helper is currently supported by cgroup programs only.
> >>    *     Return
> >>    *             0 on success, or a negative error in case of failure.
> >> + *
> >> + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
> >> + *     Description
> >> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
> >> + *             address space, and stores the data in *dst*. *flags* is not
> >> + *             used yet and is provided for future extensibility. This helper
> >> + *             can only be used by sleepable programs.
> >
> > "On error dst buffer is zeroed out."? This is an explicit guarantee.
> >
> >> + *     Return
> >> + *             0 on success, or a negative error in case of failure.
> >>    */
> >>   #define __BPF_FUNC_MAPPER(FN)          \
> >>          FN(unspec),                     \
> >> @@ -5239,6 +5248,7 @@ union bpf_attr {
> >>          FN(get_func_arg_cnt),           \
> >>          FN(get_retval),                 \
> >>          FN(set_retval),                 \
> >> +       FN(copy_from_user_task),        \
> >>          /* */
> >>
> >>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> >> index b7aef5b3416d..110029ede71e 100644
> >> --- a/kernel/bpf/bpf_iter.c
> >> +++ b/kernel/bpf/bpf_iter.c
> >> @@ -5,6 +5,7 @@
> >>   #include <linux/anon_inodes.h>
> >>   #include <linux/filter.h>
> >>   #include <linux/bpf.h>
> >> +#include <linux/rcupdate_trace.h>
> >>
> >>   struct bpf_iter_target_info {
> >>          struct list_head list;
> >> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
> >>   {
> >>          int ret;
> >>
> >> -       rcu_read_lock();
> >> -       migrate_disable();
> >> -       ret = bpf_prog_run(prog, ctx);
> >> -       migrate_enable();
> >> -       rcu_read_unlock();
> >> +       if (prog->aux->sleepable) {
> >> +               rcu_read_lock_trace();
> >> +               migrate_disable();
> >> +               might_fault();
> >> +               ret = bpf_prog_run(prog, ctx);
> >> +               migrate_enable();
> >> +               rcu_read_unlock_trace();
> >> +       } else {
> >> +               rcu_read_lock();
> >> +               migrate_disable();
> >> +               ret = bpf_prog_run(prog, ctx);
> >> +               migrate_enable();
> >> +               rcu_read_unlock();
> >> +       }
> >>
> >
> > I think this sleepable bpf_iter change deserves its own patch. It has
> > nothing to do with bpf_copy_from_user_task() helper.
>
> Without the above change, using bpf_copy_from_user_task() will trigger
> rcu warning and may produce incorrect result. One option is to put
> the above in a preparation patch before introducing
> bpf_copy_from_user_task() so we won't have bisecting issues.

Sure, patch #1 for sleepable bpf_iter, patch #2 for the helper? I
mean, it's not a big deal, but both seem to deserve their own focused
patches.

>
> >
> >>          /* bpf program can only return 0 or 1:
> >>           *  0 : okay
> [...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 20:07         ` Andrii Nakryiko
@ 2022-01-21 21:15           ` Yonghong Song
  2022-01-24 17:30             ` Kenny Yu
  0 siblings, 1 reply; 62+ messages in thread
From: Yonghong Song @ 2022-01-21 21:15 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Alexei Starovoitov, Gabriele



On 1/21/22 12:07 PM, Andrii Nakryiko wrote:
> On Fri, Jan 21, 2022 at 12:04 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 1/21/22 11:53 AM, Andrii Nakryiko wrote:
>>> On Fri, Jan 21, 2022 at 11:31 AM Kenny Yu <kennyyu@fb.com> wrote:
>>>>
>>>> This adds a helper for bpf programs to read the memory of other
>>>> tasks. This also adds the ability for bpf iterator programs to
>>>> be sleepable.
>>>>
>>>> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
>>>> sleepable bpf programs. With sleepable bpf iterator programs, we can no
>>>> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
>>>> to protect the bpf program.
>>>>
>>>> As an example use case at Meta, we are using a bpf task iterator program
>>>> and this new helper to print C++ async stack traces for all threads of
>>>> a given process.
>>>>
>>>> Signed-off-by: Kenny Yu <kennyyu@fb.com>
>>>> ---
>>>>    include/linux/bpf.h            |  1 +
>>>>    include/uapi/linux/bpf.h       | 10 ++++++++++
>>>>    kernel/bpf/bpf_iter.c          | 20 ++++++++++++++-----
>>>>    kernel/bpf/helpers.c           | 35 ++++++++++++++++++++++++++++++++++
>>>>    kernel/trace/bpf_trace.c       |  2 ++
>>>>    tools/include/uapi/linux/bpf.h | 10 ++++++++++
>>>>    6 files changed, 73 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>>> index 80e3387ea3af..5917883e528b 100644
>>>> --- a/include/linux/bpf.h
>>>> +++ b/include/linux/bpf.h
>>>> @@ -2229,6 +2229,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
>>>>    extern const struct bpf_func_proto bpf_find_vma_proto;
>>>>    extern const struct bpf_func_proto bpf_loop_proto;
>>>>    extern const struct bpf_func_proto bpf_strncmp_proto;
>>>> +extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
>>>>
>>>>    const struct bpf_func_proto *tracing_prog_func_proto(
>>>>      enum bpf_func_id func_id, const struct bpf_prog *prog);
>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>>> index fe2272defcd9..d82d9423874d 100644
>>>> --- a/include/uapi/linux/bpf.h
>>>> +++ b/include/uapi/linux/bpf.h
>>>> @@ -5049,6 +5049,15 @@ union bpf_attr {
>>>>     *             This helper is currently supported by cgroup programs only.
>>>>     *     Return
>>>>     *             0 on success, or a negative error in case of failure.
>>>> + *
>>>> + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
>>>> + *     Description
>>>> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
>>>> + *             address space, and stores the data in *dst*. *flags* is not
>>>> + *             used yet and is provided for future extensibility. This helper
>>>> + *             can only be used by sleepable programs.
>>>
>>> "On error dst buffer is zeroed out."? This is an explicit guarantee.
>>>
>>>> + *     Return
>>>> + *             0 on success, or a negative error in case of failure.
>>>>     */
>>>>    #define __BPF_FUNC_MAPPER(FN)          \
>>>>           FN(unspec),                     \
>>>> @@ -5239,6 +5248,7 @@ union bpf_attr {
>>>>           FN(get_func_arg_cnt),           \
>>>>           FN(get_retval),                 \
>>>>           FN(set_retval),                 \
>>>> +       FN(copy_from_user_task),        \
>>>>           /* */
>>>>
>>>>    /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>>>> index b7aef5b3416d..110029ede71e 100644
>>>> --- a/kernel/bpf/bpf_iter.c
>>>> +++ b/kernel/bpf/bpf_iter.c
>>>> @@ -5,6 +5,7 @@
>>>>    #include <linux/anon_inodes.h>
>>>>    #include <linux/filter.h>
>>>>    #include <linux/bpf.h>
>>>> +#include <linux/rcupdate_trace.h>
>>>>
>>>>    struct bpf_iter_target_info {
>>>>           struct list_head list;
>>>> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>>>>    {
>>>>           int ret;
>>>>
>>>> -       rcu_read_lock();
>>>> -       migrate_disable();
>>>> -       ret = bpf_prog_run(prog, ctx);
>>>> -       migrate_enable();
>>>> -       rcu_read_unlock();
>>>> +       if (prog->aux->sleepable) {
>>>> +               rcu_read_lock_trace();
>>>> +               migrate_disable();
>>>> +               might_fault();
>>>> +               ret = bpf_prog_run(prog, ctx);
>>>> +               migrate_enable();
>>>> +               rcu_read_unlock_trace();
>>>> +       } else {
>>>> +               rcu_read_lock();
>>>> +               migrate_disable();
>>>> +               ret = bpf_prog_run(prog, ctx);
>>>> +               migrate_enable();
>>>> +               rcu_read_unlock();
>>>> +       }
>>>>
>>>
>>> I think this sleepable bpf_iter change deserves its own patch. It has
>>> nothing to do with bpf_copy_from_user_task() helper.
>>
>> Without the above change, using bpf_copy_from_user_task() will trigger
>> rcu warning and may produce incorrect result. One option is to put
>> the above in a preparation patch before introducing
>> bpf_copy_from_user_task() so we won't have bisecting issues.
> 
> Sure, patch #1 for sleepable bpf_iter, patch #2 for the helper? I
> mean, it's not a big deal, but both seem to deserve their own focused
> patches.

okay with me.

> 
>>
>>>
>>>>           /* bpf program can only return 0 or 1:
>>>>            *  0 : okay
>> [...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 19:53     ` Andrii Nakryiko
  2022-01-21 20:04       ` Yonghong Song
@ 2022-01-22  9:58       ` Gabriele
  2022-01-22 10:08         ` Gabriele
  1 sibling, 1 reply; 62+ messages in thread
From: Gabriele @ 2022-01-22  9:58 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Yonghong Song, Alexei Starovoitov

On Fri, 21 Jan 2022 at 19:53, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> "On error dst buffer is zeroed out."? This is an explicit guarantee.
>

I appreciate that existing helpers already do this and it's good to
follow suit for consistency, but what is the rationale behind zeroing
memory on failure? Can't this step be skipped for performance and
leave it up to the callee to perform if they need it?

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-22  9:58       ` Gabriele
@ 2022-01-22 10:08         ` Gabriele
  0 siblings, 0 replies; 62+ messages in thread
From: Gabriele @ 2022-01-22 10:08 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Yonghong Song, Alexei Starovoitov

s/callee/caller

On Sat, 22 Jan 2022 at 09:58, Gabriele <phoenix1987@gmail.com> wrote:
>
> On Fri, 21 Jan 2022 at 19:53, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > "On error dst buffer is zeroed out."? This is an explicit guarantee.
> >
>
> I appreciate that existing helpers already do this and it's good to
> follow suit for consistency, but what is the rationale behind zeroing
> memory on failure? Can't this step be skipped for performance and
> leave it up to the callee to perform if they need it?



-- 
"Egli è scritto in lingua matematica, e i caratteri son triangoli,
cerchi, ed altre figure
geometriche, senza i quali mezzi è impossibile a intenderne umanamente parola;
senza questi è un aggirarsi vanamente per un oscuro laberinto."

-- G. Galilei, Il saggiatore.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper
  2022-01-21 21:15           ` Yonghong Song
@ 2022-01-24 17:30             ` Kenny Yu
  0 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-24 17:30 UTC (permalink / raw)
  To: yhs
  Cc: alexei.starovoitov, andrii.nakryiko, andrii, ast, bpf, daniel,
	kennyyu, phoenix1987

> "On error dst buffer is zeroed out."? This is an explicit guarantee.

Will add to the docs.

> is there a point in calling access_process_vm() with size == 0? It
> would validate that get_task_mm() succeeds, but that's pretty much it?
> So maybe instead just exit early if size is zero? It will be also less
> convoluted logic:
> 
> if (size == 0)
>     return 0;
> if (access_process_vm(...)) {
>     memset(0);
>     return -EFAULT;
> }
> return 0;

Will do an explicit check for `size == 0`.
Note that we still need to check if the return value of
`access_process_vm` == `size` to see if we have a partial read.

> > Without the above change, using bpf_copy_from_user_task() will trigger
> > rcu warning and may produce incorrect result. One option is to put
> > the above in a preparation patch before introducing
> > bpf_copy_from_user_task() so we won't have bisecting issues.
>
> Sure, patch #1 for sleepable bpf_iter, patch #2 for the helper? I
> mean, it's not a big deal, but both seem to deserve their own focused
> patches.

I'll split this into 2 patches and place the bpf_iter patch first.

> I appreciate that existing helpers already do this and it's good to
> follow suit for consistency, but what is the rationale behind zeroing
> memory on failure?

I believe the intent behind this is for security/privacy reasons.
On error, we don't want to unintentionally leak partially read data.

Thanks everyone for the suggestions!

Kenny

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and sleepable bpf iterator programs
  2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
                   ` (7 preceding siblings ...)
  2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
@ 2022-01-24 18:53 ` Kenny Yu
  2022-01-24 18:54   ` [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers Kenny Yu
                     ` (4 more replies)
  8 siblings, 5 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-24 18:53 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This patch series makes the following changes:
* Adds a new bpf helper `bpf_copy_from_user_task` to read user space
  memory from a different task.
* Adds the ability to create sleepable bpf iterator programs.

As an example of how this will be used, at Meta we are using bpf task
iterator programs and this new bpf helper to read C++ async stack traces of
a running process for debugging C++ binaries in production.

Changes since v6:
* Split first patch into two patches: first patch to add support
  for bpf iterators to use sleepable helpers, and the second to add
  the new bpf helper.
* Simplify implementation of `bpf_copy_from_user_task` based on feedback.
* Add to docs that the destination buffer will be zero-ed on error.

Changes since v5:
* Rename `bpf_access_process_vm` to `bpf_copy_from_user_task`.
* Change return value to be all-or-nothing. If we get a partial read,
  memset all bytes to 0 and return -EFAULT.
* Add to docs that the helper can only be used by sleepable BPF programs.
* Fix nits in selftests.

Changes since v4:
* Make `flags` into u64.
* Use `user_ptr` arg name to be consistent with `bpf_copy_from_user`.
* Add an extra check in selftests to verify access_process_vm calls
  succeeded.

Changes since v3:
* Check if `flags` is 0 and return -EINVAL if not.
* Rebase on latest bpf-next branch and fix merge conflicts.

Changes since v2:
* Reorder arguments in `bpf_access_process_vm` to match existing related
  bpf helpers (e.g. `bpf_probe_read_kernel`, `bpf_probe_read_user`,
  `bpf_copy_from_user`).
* `flags` argument is provided for future extensibility and is not
  currently used, and we always invoke `access_process_vm` with no flags.
* Merge bpf helper patch and `bpf_iter_run_prog` patch together for better
  bisectability in case of failures.
* Clean up formatting and comments in selftests.

Changes since v1:
* Fixed "Invalid wait context" issue in `bpf_iter_run_prog` by using
  `rcu_read_lock_trace()` for sleepable bpf iterator programs.

Kenny Yu (4):
  bpf: Add support for bpf iterator programs to use sleepable helpers
  bpf: Add bpf_copy_from_user_task() helper
  libbpf: Add "iter.s" section for sleepable bpf iterator programs
  selftests/bpf: Add test for sleepable bpf iterator programs

 include/linux/bpf.h                           |  1 +
 include/uapi/linux/bpf.h                      | 11 ++++
 kernel/bpf/bpf_iter.c                         | 20 +++++--
 kernel/bpf/helpers.c                          | 34 ++++++++++++
 kernel/trace/bpf_trace.c                      |  2 +
 tools/include/uapi/linux/bpf.h                | 11 ++++
 tools/lib/bpf/libbpf.c                        |  1 +
 .../selftests/bpf/prog_tests/bpf_iter.c       | 20 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
 9 files changed, 149 insertions(+), 5 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 62+ messages in thread

* [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
@ 2022-01-24 18:54   ` Kenny Yu
  2022-01-24 22:19     ` Andrii Nakryiko
  2022-01-24 18:54   ` [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-24 18:54 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This patch allows bpf iterator programs to use sleepable helpers by
changing `bpf_iter_run_prog` to use the appropriate synchronization.
With sleepable bpf iterator programs, we can no longer use
`rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
to protect the bpf program.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 kernel/bpf/bpf_iter.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b7aef5b3416d..110029ede71e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -5,6 +5,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/rcupdate_trace.h>
 
 struct bpf_iter_target_info {
 	struct list_head list;
@@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
 	int ret;
 
-	rcu_read_lock();
-	migrate_disable();
-	ret = bpf_prog_run(prog, ctx);
-	migrate_enable();
-	rcu_read_unlock();
+	if (prog->aux->sleepable) {
+		rcu_read_lock_trace();
+		migrate_disable();
+		might_fault();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock_trace();
+	} else {
+		rcu_read_lock();
+		migrate_disable();
+		ret = bpf_prog_run(prog, ctx);
+		migrate_enable();
+		rcu_read_unlock();
+	}
 
 	/* bpf program can only return 0 or 1:
 	 *  0 : okay
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
  2022-01-24 18:54   ` [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers Kenny Yu
@ 2022-01-24 18:54   ` Kenny Yu
  2022-01-24 22:18     ` Andrii Nakryiko
  2022-01-24 18:54   ` [PATCH v7 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 62+ messages in thread
From: Kenny Yu @ 2022-01-24 18:54 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This adds a helper for bpf programs to read the memory of other
tasks.

As an example use case at Meta, we are using a bpf task iterator program
and this new helper to print C++ async stack traces for all threads of
a given process.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/helpers.c           | 34 ++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 5 files changed, 59 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e8ec8d2f2fe3..4f13642c6a47 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2238,6 +2238,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_strncmp_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 16a7574292a5..4a2f7041ebae 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5076,6 +5076,16 @@ union bpf_attr {
  *		associated to *xdp_md*, at *offset*.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure. On error
+ *		*dst* buffer is zeroed out.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5269,6 +5279,7 @@ union bpf_attr {
 	FN(xdp_get_buff_len),		\
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 01cfdf40c838..2688b0064289 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/btf_ids.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -671,6 +672,39 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
+	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
+{
+	int ret;
+
+	/* flags is not used yet */
+	if (flags)
+		return -EINVAL;
+
+	if (size == 0)
+		return 0;
+
+	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
+	if (ret == size)
+		return 0;
+
+	memset(dst, 0, size);
+	/* Return -EFAULT for partial read */
+	return (ret < 0) ? ret : -EFAULT;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_task_proto = {
+	.func		= bpf_copy_from_user_task,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID,
+	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+	.arg5_type	= ARG_ANYTHING
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
 	if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 06a9e220069e..a2024ba32a20 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1235,6 +1235,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_task_stack_proto;
 	case BPF_FUNC_copy_from_user:
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+	case BPF_FUNC_copy_from_user_task:
+		return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_per_cpu_ptr:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 16a7574292a5..4a2f7041ebae 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5076,6 +5076,16 @@ union bpf_attr {
  *		associated to *xdp_md*, at *offset*.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ *	Description
+ *		Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ *		address space, and stores the data in *dst*. *flags* is not
+ *		used yet and is provided for future extensibility. This helper
+ *		can only be used by sleepable programs.
+ *	Return
+ *		0 on success, or a negative error in case of failure. On error
+ *		*dst* buffer is zeroed out.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5269,6 +5279,7 @@ union bpf_attr {
 	FN(xdp_get_buff_len),		\
 	FN(xdp_load_bytes),		\
 	FN(xdp_store_bytes),		\
+	FN(copy_from_user_task),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v7 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
  2022-01-24 18:54   ` [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers Kenny Yu
  2022-01-24 18:54   ` [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
@ 2022-01-24 18:54   ` Kenny Yu
  2022-01-24 18:54   ` [PATCH v7 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
  2022-01-25  4:10   ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " patchwork-bot+netdevbpf
  4 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-24 18:54 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This adds a new bpf section "iter.s" to allow bpf iterator programs to
be sleepable.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/lib/bpf/libbpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a8c750373ad5..57894f125df3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8612,6 +8612,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
+	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp.frags/devmap",	XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
 	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH v7 bpf-next 4/4] selftests/bpf: Add test for sleepable bpf iterator programs
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
                     ` (2 preceding siblings ...)
  2022-01-24 18:54   ` [PATCH v7 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
@ 2022-01-24 18:54   ` Kenny Yu
  2022-01-25  4:10   ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " patchwork-bot+netdevbpf
  4 siblings, 0 replies; 62+ messages in thread
From: Kenny Yu @ 2022-01-24 18:54 UTC (permalink / raw)
  To: kennyyu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 20 +++++++
 .../selftests/bpf/progs/bpf_iter_task.c       | 54 +++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..5142a7d130b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,24 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+		  "num_expected_failure_copy_from_user_task");
+	ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+		  "num_success_copy_from_user_task");
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..d22741272692 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_success_copy_from_user_task = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int ret;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Read an invalid pointer and ensure we get an error */
+	ptr = NULL;
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		++num_expected_failure_copy_from_user_task;
+	} else {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	++num_success_copy_from_user_task;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper
  2022-01-24 18:54   ` [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
@ 2022-01-24 22:18     ` Andrii Nakryiko
  2022-01-25  4:06       ` Alexei Starovoitov
  0 siblings, 1 reply; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-24 22:18 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Yonghong Song, Alexei Starovoitov, Gabriele

On Mon, Jan 24, 2022 at 10:54 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a helper for bpf programs to read the memory of other
> tasks.
>
> As an example use case at Meta, we are using a bpf task iterator program
> and this new helper to print C++ async stack traces for all threads of
> a given process.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---

LGTM.

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/bpf.h       | 11 +++++++++++
>  kernel/bpf/helpers.c           | 34 ++++++++++++++++++++++++++++++++++
>  kernel/trace/bpf_trace.c       |  2 ++
>  tools/include/uapi/linux/bpf.h | 11 +++++++++++
>  5 files changed, 59 insertions(+)
>

[...]

> +       ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> +       if (ret == size)
> +               return 0;
> +
> +       memset(dst, 0, size);
> +       /* Return -EFAULT for partial read */
> +       return (ret < 0) ? ret : -EFAULT;

nit: unnecessary ()

> +}
> +
> +const struct bpf_func_proto bpf_copy_from_user_task_proto = {
> +       .func           = bpf_copy_from_user_task,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
> +       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
> +       .arg3_type      = ARG_ANYTHING,
> +       .arg4_type      = ARG_PTR_TO_BTF_ID,
> +       .arg4_btf_id    = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
> +       .arg5_type      = ARG_ANYTHING
> +};
> +

[...]

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers
  2022-01-24 18:54   ` [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers Kenny Yu
@ 2022-01-24 22:19     ` Andrii Nakryiko
  0 siblings, 0 replies; 62+ messages in thread
From: Andrii Nakryiko @ 2022-01-24 22:19 UTC (permalink / raw)
  To: Kenny Yu
  Cc: Andrii Nakryiko, Alexei Starovoitov, bpf, Daniel Borkmann,
	Yonghong Song, Alexei Starovoitov, Gabriele

On Mon, Jan 24, 2022 at 10:54 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This patch allows bpf iterator programs to use sleepable helpers by
> changing `bpf_iter_run_prog` to use the appropriate synchronization.
> With sleepable bpf iterator programs, we can no longer use
> `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> to protect the bpf program.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---

Acked-by: Andrii Nakryiko <andrii@kernel.org>


>  kernel/bpf/bpf_iter.c | 20 +++++++++++++++-----
>  1 file changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index b7aef5b3416d..110029ede71e 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -5,6 +5,7 @@
>  #include <linux/anon_inodes.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
> +#include <linux/rcupdate_trace.h>
>
>  struct bpf_iter_target_info {
>         struct list_head list;
> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  {
>         int ret;
>
> -       rcu_read_lock();
> -       migrate_disable();
> -       ret = bpf_prog_run(prog, ctx);
> -       migrate_enable();
> -       rcu_read_unlock();
> +       if (prog->aux->sleepable) {
> +               rcu_read_lock_trace();
> +               migrate_disable();
> +               might_fault();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock_trace();
> +       } else {
> +               rcu_read_lock();
> +               migrate_disable();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock();
> +       }
>
>         /* bpf program can only return 0 or 1:
>          *  0 : okay
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper
  2022-01-24 22:18     ` Andrii Nakryiko
@ 2022-01-25  4:06       ` Alexei Starovoitov
  0 siblings, 0 replies; 62+ messages in thread
From: Alexei Starovoitov @ 2022-01-25  4:06 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Kenny Yu, Andrii Nakryiko, Alexei Starovoitov, bpf,
	Daniel Borkmann, Yonghong Song, Gabriele

On Mon, Jan 24, 2022 at 2:18 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Jan 24, 2022 at 10:54 AM Kenny Yu <kennyyu@fb.com> wrote:
> >
> > This adds a helper for bpf programs to read the memory of other
> > tasks.
> >
> > As an example use case at Meta, we are using a bpf task iterator program
> > and this new helper to print C++ async stack traces for all threads of
> > a given process.
> >
> > Signed-off-by: Kenny Yu <kennyyu@fb.com>
> > ---
>
> LGTM.
>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
>
> >  include/linux/bpf.h            |  1 +
> >  include/uapi/linux/bpf.h       | 11 +++++++++++
> >  kernel/bpf/helpers.c           | 34 ++++++++++++++++++++++++++++++++++
> >  kernel/trace/bpf_trace.c       |  2 ++
> >  tools/include/uapi/linux/bpf.h | 11 +++++++++++
> >  5 files changed, 59 insertions(+)
> >
>
> [...]
>
> > +       ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> > +       if (ret == size)
> > +               return 0;
> > +
> > +       memset(dst, 0, size);
> > +       /* Return -EFAULT for partial read */
> > +       return (ret < 0) ? ret : -EFAULT;
>
> nit: unnecessary ()

I fixed it while applying.
Also added a few unlikely().

Thanks everyone!

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and sleepable bpf iterator programs
  2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
                     ` (3 preceding siblings ...)
  2022-01-24 18:54   ` [PATCH v7 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
@ 2022-01-25  4:10   ` patchwork-bot+netdevbpf
  4 siblings, 0 replies; 62+ messages in thread
From: patchwork-bot+netdevbpf @ 2022-01-25  4:10 UTC (permalink / raw)
  To: Kenny Yu
  Cc: andrii, ast, bpf, daniel, yhs, alexei.starovoitov,
	andrii.nakryiko, phoenix1987

Hello:

This series was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Mon, 24 Jan 2022 10:53:59 -0800 you wrote:
> This patch series makes the following changes:
> * Adds a new bpf helper `bpf_copy_from_user_task` to read user space
>   memory from a different task.
> * Adds the ability to create sleepable bpf iterator programs.
> 
> As an example of how this will be used, at Meta we are using bpf task
> iterator programs and this new bpf helper to read C++ async stack traces of
> a running process for debugging C++ binaries in production.
> 
> [...]

Here is the summary with links:
  - [v7,bpf-next,1/4] bpf: Add support for bpf iterator programs to use sleepable helpers
    https://git.kernel.org/bpf/bpf-next/c/b77fb25dcb34
  - [v7,bpf-next,2/4] bpf: Add bpf_copy_from_user_task() helper
    https://git.kernel.org/bpf/bpf-next/c/376040e47334
  - [v7,bpf-next,3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs
    https://git.kernel.org/bpf/bpf-next/c/a8b77f7463a5
  - [v7,bpf-next,4/4] selftests/bpf: Add test for sleepable bpf iterator programs
    https://git.kernel.org/bpf/bpf-next/c/45105c2eb751

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 62+ messages in thread

end of thread, other threads:[~2022-01-25  6:58 UTC | newest]

Thread overview: 62+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
2022-01-13 23:31 ` [PATCH bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-13 23:31 ` [PATCH bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-13 23:31 ` [PATCH bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-14  0:48   ` [PATCH v2 bpf-next 1/4] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-14  0:48   ` [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog Kenny Yu
2022-01-14  2:50     ` Alexei Starovoitov
2022-01-14 23:15       ` Kenny Yu
2022-01-14  0:48   ` [PATCH v2 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-14  0:49   ` [PATCH v2 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
2022-01-14  2:39     ` Alexei Starovoitov
2022-01-14 23:14       ` Kenny Yu
2022-01-15  1:38         ` Andrii Nakryiko
2022-01-15  4:30           ` Kenny Yu
2022-01-15 16:27           ` Gabriele
2022-01-19 16:56             ` Kenny Yu
2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-19 18:02   ` [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-19 20:16     ` Alexei Starovoitov
2022-01-19 22:51       ` Kenny Yu
2022-01-19 18:02   ` [PATCH v3 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-19 18:02   ` [PATCH v3 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-19 22:59   ` [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-20  3:45     ` Yonghong Song
2022-01-20 17:11       ` Kenny Yu
2022-01-19 22:59   ` [PATCH v4 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-19 22:59   ` [PATCH v4 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-20 17:29   ` [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-20 22:45     ` Andrii Nakryiko
2022-01-21  2:27       ` Alexei Starovoitov
2022-01-21 17:20         ` Andrii Nakryiko
2022-01-21 17:41           ` Kenny Yu
2022-01-21 17:47             ` Alexei Starovoitov
2022-01-21 18:30               ` Kenny Yu
2022-01-20 17:29   ` [PATCH v5 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-20 17:29   ` [PATCH v5 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-20 22:48     ` Andrii Nakryiko
2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
2022-01-21 19:30   ` [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
2022-01-21 19:53     ` Andrii Nakryiko
2022-01-21 20:04       ` Yonghong Song
2022-01-21 20:07         ` Andrii Nakryiko
2022-01-21 21:15           ` Yonghong Song
2022-01-24 17:30             ` Kenny Yu
2022-01-22  9:58       ` Gabriele
2022-01-22 10:08         ` Gabriele
2022-01-21 19:30   ` [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-21 19:54     ` Andrii Nakryiko
2022-01-21 19:30   ` [PATCH v6 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-21 19:55     ` Andrii Nakryiko
2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
2022-01-24 18:54   ` [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers Kenny Yu
2022-01-24 22:19     ` Andrii Nakryiko
2022-01-24 18:54   ` [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
2022-01-24 22:18     ` Andrii Nakryiko
2022-01-25  4:06       ` Alexei Starovoitov
2022-01-24 18:54   ` [PATCH v7 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-24 18:54   ` [PATCH v7 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
2022-01-25  4:10   ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).