bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/5] Fix BPF probe memory helpers
@ 2019-10-25 16:37 Daniel Borkmann
  2019-10-25 16:37 ` [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function Daniel Borkmann
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 16:37 UTC (permalink / raw)
  To: ast; +Cc: bpf, netdev, Daniel Borkmann

This set adds probe_read_{user,kernel}(), probe_read_str_{user,kernel}()
helpers, fixes probe_write_user() helper and selftests. For details please
see individual patches.

Thanks!

Daniel Borkmann (5):
  uaccess: Add non-pagefault user-space write function
  bpf: Make use of probe_user_write in probe write helper
  bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers
  bpf, samples: Use bpf_probe_read_user where appropriate
  bpf, testing: Add selftest to read/write sockaddr from user space

 include/linux/uaccess.h                       |  12 ++
 include/uapi/linux/bpf.h                      | 119 ++++++++++-----
 kernel/trace/bpf_trace.c                      | 139 ++++++++++++------
 mm/maccess.c                                  |  45 +++++-
 samples/bpf/map_perf_test_kern.c              |   4 +-
 samples/bpf/test_map_in_map_kern.c            |   4 +-
 samples/bpf/test_probe_write_user_kern.c      |   2 +-
 tools/include/uapi/linux/bpf.h                | 119 ++++++++++-----
 .../selftests/bpf/prog_tests/probe_user.c     |  80 ++++++++++
 .../selftests/bpf/progs/test_probe_user.c     |  33 +++++
 10 files changed, 426 insertions(+), 131 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_user.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_probe_user.c

-- 
2.21.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function
  2019-10-25 16:37 [PATCH bpf-next 0/5] Fix BPF probe memory helpers Daniel Borkmann
@ 2019-10-25 16:37 ` Daniel Borkmann
  2019-10-25 21:53   ` Andrii Nakryiko
  2019-10-25 16:37 ` [PATCH bpf-next 2/5] bpf: Make use of probe_user_write in probe write helper Daniel Borkmann
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 16:37 UTC (permalink / raw)
  To: ast; +Cc: bpf, netdev, Daniel Borkmann

Commit 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions")
missed to add probe write function, therefore factor out a probe_write_common()
helper with most logic of probe_kernel_write() except setting KERNEL_DS, and
add a new probe_user_write() helper so it can be used from BPF side.

Again, on some archs, the user address space and kernel address space can
co-exist and be overlapping, so in such case, setting KERNEL_DS would mean
that the given address is treated as being in kernel address space.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/uaccess.h | 12 +++++++++++
 mm/maccess.c            | 45 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index e47d0522a1f4..86dcf2894672 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -337,6 +337,18 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
 extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
 extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
 
+/*
+ * probe_user_write(): safely attempt to write to a location in user space
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
+extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
+
 extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
 				     long count);
diff --git a/mm/maccess.c b/mm/maccess.c
index d065736f6b87..2d3c3d01064c 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size)
 	return ret ? -EFAULT : 0;
 }
 
+static __always_inline long
+probe_write_common(void __user *dst, const void *src, size_t size)
+{
+	long ret;
+
+	pagefault_disable();
+	ret = __copy_to_user_inatomic(dst, src, size);
+	pagefault_enable();
+
+	return ret ? -EFAULT : 0;
+}
+
 /**
  * probe_kernel_read(): safely attempt to read from a kernel-space location
  * @dst: pointer to the buffer that shall take the data
@@ -85,6 +97,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
+
 long __weak probe_kernel_write(void *dst, const void *src, size_t size)
     __attribute__((alias("__probe_kernel_write")));
 
@@ -94,15 +107,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 	mm_segment_t old_fs = get_fs();
 
 	set_fs(KERNEL_DS);
-	pagefault_disable();
-	ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
-	pagefault_enable();
+	ret = probe_write_common((__force void __user *)dst, src, size);
 	set_fs(old_fs);
 
-	return ret ? -EFAULT : 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(probe_kernel_write);
 
+/**
+ * probe_user_write(): safely attempt to write to a user-space location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+
+long __weak probe_user_write(void __user *dst, const void *src, size_t size)
+    __attribute__((alias("__probe_user_write")));
+
+long __probe_user_write(void __user *dst, const void *src, size_t size)
+{
+	long ret = -EFAULT;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(USER_DS);
+	if (access_ok(dst, size))
+		ret = probe_write_common(dst, src, size);
+	set_fs(old_fs);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(probe_user_write);
 
 /**
  * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
-- 
2.21.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH bpf-next 2/5] bpf: Make use of probe_user_write in probe write helper
  2019-10-25 16:37 [PATCH bpf-next 0/5] Fix BPF probe memory helpers Daniel Borkmann
  2019-10-25 16:37 ` [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function Daniel Borkmann
@ 2019-10-25 16:37 ` Daniel Borkmann
  2019-10-25 21:59   ` Andrii Nakryiko
  2019-10-25 16:37 ` [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers Daniel Borkmann
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 16:37 UTC (permalink / raw)
  To: ast; +Cc: bpf, netdev, Daniel Borkmann

Convert the bpf_probe_write_user() helper to probe_user_write() such that
writes are not attempted under KERNEL_DS anymore which is buggy as kernel
and user space pointers can have overlapping addresses. Also, given we have
the access_ok() check inside probe_user_write(), the helper doesn't need
to do it twice.

Fixes: 96ae52279594 ("bpf: Add bpf_probe_write_user BPF helper to be called in tracers")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/trace/bpf_trace.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c3240898cc44..79919a26cd59 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -163,7 +163,7 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
 	   u32, size)
 {
 	/*
@@ -186,10 +186,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 		return -EPERM;
 	if (unlikely(!nmi_uaccess_okay()))
 		return -EPERM;
-	if (!access_ok(unsafe_ptr, size))
-		return -EPERM;
 
-	return probe_kernel_write(unsafe_ptr, src, size);
+	return probe_user_write(unsafe_ptr, src, size);
 }
 
 static const struct bpf_func_proto bpf_probe_write_user_proto = {
-- 
2.21.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers
  2019-10-25 16:37 [PATCH bpf-next 0/5] Fix BPF probe memory helpers Daniel Borkmann
  2019-10-25 16:37 ` [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function Daniel Borkmann
  2019-10-25 16:37 ` [PATCH bpf-next 2/5] bpf: Make use of probe_user_write in probe write helper Daniel Borkmann
@ 2019-10-25 16:37 ` Daniel Borkmann
  2019-10-25 22:08   ` Andrii Nakryiko
  2019-10-25 16:37 ` [PATCH bpf-next 4/5] bpf, samples: Use bpf_probe_read_user where appropriate Daniel Borkmann
  2019-10-25 16:37 ` [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space Daniel Borkmann
  4 siblings, 1 reply; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 16:37 UTC (permalink / raw)
  To: ast; +Cc: bpf, netdev, Daniel Borkmann

The current bpf_probe_read() and bpf_probe_read_str() helpers are broken
in that they assume they can be used for probing memory access for kernel
space addresses /as well as/ user space addresses.

However, plain use of probe_kernel_read() for both cases will attempt to
always access kernel space address space given access is performed under
KERNEL_DS and some archs in-fact have overlapping address spaces where a
kernel pointer and user pointer would have the /same/ address value and
therefore accessing application memory via bpf_probe_read{,_str}() would
read garbage values.

Lets fix BPF side by making use of recently added 3d7081822f7f ("uaccess:
Add non-pagefault user-space read functions"). Unfortunately, the only way
to fix this status quo is to add dedicated bpf_probe_read_{user,kernel}()
and bpf_probe_read_str_{user,kernel}() helpers. The bpf_probe_read{,_str}()
helpers are aliased to the *_kernel() variants to retain their current
behavior; for API consistency and ease of use the latter have been added
so that it is immediately *obvious* which address space the memory is being
probed on (user,kernel). The two *_user() variants attempt the access under
USER_DS set.

Fixes: a5e8c07059d0 ("bpf: add bpf_probe_read_str helper")
Fixes: 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h       | 119 ++++++++++++++++++++---------
 kernel/trace/bpf_trace.c       | 133 ++++++++++++++++++++++-----------
 tools/include/uapi/linux/bpf.h | 119 ++++++++++++++++++++---------
 3 files changed, 253 insertions(+), 118 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4af8b0819a32..b8ffb419df51 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -564,7 +564,11 @@ union bpf_attr {
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
- * 		address *src* and store the data in *dst*.
+ * 		kernel space address *src* and store the data in *dst*.
+ *
+ * 		This helper is an alias to bpf_probe_read_kernel().
+ *
+ * 		Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() instead.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -1428,43 +1432,14 @@ union bpf_attr {
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
  * 	Description
- * 		Copy a NUL terminated string from an unsafe address
- * 		*unsafe_ptr* to *dst*. The *size* should include the
- * 		terminating NUL byte. In case the string length is smaller than
- * 		*size*, the target is not padded with further NUL bytes. If the
- * 		string length is larger than *size*, just *size*-1 bytes are
- * 		copied and the last byte is set to NUL.
- *
- * 		On success, the length of the copied string is returned. This
- * 		makes this helper useful in tracing programs for reading
- * 		strings, and more importantly to get its length at runtime. See
- * 		the following snippet:
- *
- * 		::
- *
- * 			SEC("kprobe/sys_open")
- * 			void bpf_sys_open(struct pt_regs *ctx)
- * 			{
- * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
- * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
- * 				                             ctx->di);
- *
- * 				// Consume buf, for example push it to
- * 				// userspace via bpf_perf_event_output(); we
- * 				// can use res (the string length) as event
- * 				// size, after checking its boundaries.
- * 			}
+ * 		Copy a NUL terminated string from an unsafe kernel address
+ * 		*unsafe_ptr* to *dst*. See bpf_probe_read_str_kernel() for
+ * 		more details.
  *
- * 		In comparison, using **bpf_probe_read()** helper here instead
- * 		to read the string would require to estimate the length at
- * 		compile time, and would often result in copying more memory
- * 		than necessary.
+ * 		This helper is an alias to bpf_probe_read_str_kernel().
  *
- * 		Another useful use case is when parsing individual process
- * 		arguments or individual environment variables navigating
- * 		*current*\ **->mm->arg_start** and *current*\
- * 		**->mm->env_start**: using this helper and the return value,
- * 		one can quickly iterate at the right offset of the memory area.
+ * 		Generally, use bpf_probe_read_str_user() or bpf_probe_read_str_kernel()
+ * 		instead.
  * 	Return
  * 		On success, the strictly positive length of the string,
  * 		including the trailing NUL character. On error, a negative
@@ -2775,6 +2750,72 @@ union bpf_attr {
  * 		restricted to raw_tracepoint bpf programs.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *src)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from user space address
+ * 		*src* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *src)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from kernel space address
+ * 		*src* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str_user(void *dst, int size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe user address
+ * 		*unsafe_ptr* to *dst*. The *size* should include the
+ * 		terminating NUL byte. In case the string length is smaller than
+ * 		*size*, the target is not padded with further NUL bytes. If the
+ * 		string length is larger than *size*, just *size*-1 bytes are
+ * 		copied and the last byte is set to NUL.
+ *
+ * 		On success, the length of the copied string is returned. This
+ * 		makes this helper useful in tracing programs for reading
+ * 		strings, and more importantly to get its length at runtime. See
+ * 		the following snippet:
+ *
+ * 		::
+ *
+ * 			SEC("kprobe/sys_open")
+ * 			void bpf_sys_open(struct pt_regs *ctx)
+ * 			{
+ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+ * 			        int res = bpf_probe_read_str_user(buf, sizeof(buf),
+ * 				                                  ctx->di);
+ *
+ * 				// Consume buf, for example push it to
+ * 				// userspace via bpf_perf_event_output(); we
+ * 				// can use res (the string length) as event
+ * 				// size, after checking its boundaries.
+ * 			}
+ *
+ * 		In comparison, using **bpf_probe_read_user()** helper here
+ * 		instead to read the string would require to estimate the length
+ * 		at compile time, and would often result in copying more memory
+ * 		than necessary.
+ *
+ * 		Another useful use case is when parsing individual process
+ * 		arguments or individual environment variables navigating
+ * 		*current*\ **->mm->arg_start** and *current*\
+ * 		**->mm->env_start**: using this helper and the return value,
+ * 		one can quickly iterate at the right offset of the memory area.
+ * 	Return
+ * 		On success, the strictly positive length of the string,
+ * 		including the trailing NUL character. On error, a negative
+ * 		value.
+ *
+ * int bpf_probe_read_str_kernel(void *dst, int size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * 		to *dst*. Same semantics as with bpf_probe_read_str_user() apply.
+ * 	Return
+ * 		On success, the strictly positive length of the string,	including
+ * 		the trailing NUL character. On error, a negative value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2888,7 +2929,11 @@ union bpf_attr {
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
 	FN(tcp_gen_syncookie),		\
-	FN(skb_output),
+	FN(skb_output),			\
+	FN(probe_read_user),		\
+	FN(probe_read_kernel),		\
+	FN(probe_read_str_user),	\
+	FN(probe_read_str_kernel),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 79919a26cd59..ff001b766799 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -138,12 +138,52 @@ static const struct bpf_func_proto bpf_override_return_proto = {
 };
 #endif
 
-BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
+BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
+	   const void __user *, unsafe_ptr)
 {
-	int ret;
+	int ret = probe_user_read(dst, unsafe_ptr, size);
 
-	ret = security_locked_down(LOCKDOWN_BPF_READ);
-	if (ret < 0)
+	if (unlikely(ret < 0))
+		memset(dst, 0, size);
+
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_user_proto = {
+	.func		= bpf_probe_read_user,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_str_user, void *, dst, u32, size,
+	   const void __user *, unsafe_ptr)
+{
+	int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
+
+	if (unlikely(ret < 0))
+		memset(dst, 0, size);
+
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_str_user_proto = {
+	.func		= bpf_probe_read_str_user,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
+	   const void *, unsafe_ptr)
+{
+	int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+	if (unlikely(ret < 0))
 		goto out;
 
 	ret = probe_kernel_read(dst, unsafe_ptr, size);
@@ -154,8 +194,42 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 	return ret;
 }
 
-static const struct bpf_func_proto bpf_probe_read_proto = {
-	.func		= bpf_probe_read,
+static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
+	.func		= bpf_probe_read_kernel,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_str_kernel, void *, dst, u32, size,
+	   const void *, unsafe_ptr)
+{
+	int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+	if (unlikely(ret < 0))
+		goto out;
+
+	/*
+	 * The strncpy_from_unsafe() call will likely not fill the entire
+	 * buffer, but that's okay in this circumstance as we're probing
+	 * arbitrary memory anyway similar to bpf_probe_read_*() and might
+	 * as well probe the stack. Thus, memory is explicitly cleared
+	 * only in error case, so that improper users ignoring return
+	 * code altogether don't copy garbage; otherwise length of string
+	 * is returned that can be used for bpf_perf_event_output() et al.
+	 */
+	ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
+	if (unlikely(ret < 0))
+out:
+		memset(dst, 0, size);
+
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_str_kernel_proto = {
+	.func		= bpf_probe_read_str_kernel,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
@@ -583,41 +657,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
-	   const void *, unsafe_ptr)
-{
-	int ret;
-
-	ret = security_locked_down(LOCKDOWN_BPF_READ);
-	if (ret < 0)
-		goto out;
-
-	/*
-	 * The strncpy_from_unsafe() call will likely not fill the entire
-	 * buffer, but that's okay in this circumstance as we're probing
-	 * arbitrary memory anyway similar to bpf_probe_read() and might
-	 * as well probe the stack. Thus, memory is explicitly cleared
-	 * only in error case, so that improper users ignoring return
-	 * code altogether don't copy garbage; otherwise length of string
-	 * is returned that can be used for bpf_perf_event_output() et al.
-	 */
-	ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
-	if (unlikely(ret < 0))
-out:
-		memset(dst, 0, size);
-
-	return ret;
-}
-
-static const struct bpf_func_proto bpf_probe_read_str_proto = {
-	.func		= bpf_probe_read_str,
-	.gpl_only	= true,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
-	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
-	.arg3_type	= ARG_ANYTHING,
-};
-
 struct send_signal_irq_work {
 	struct irq_work irq_work;
 	struct task_struct *task;
@@ -697,8 +736,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_pop_elem_proto;
 	case BPF_FUNC_map_peek_elem:
 		return &bpf_map_peek_elem_proto;
-	case BPF_FUNC_probe_read:
-		return &bpf_probe_read_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_tail_call:
@@ -725,8 +762,16 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_current_task_under_cgroup_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
+	case BPF_FUNC_probe_read_user:
+		return &bpf_probe_read_user_proto;
+	case BPF_FUNC_probe_read_kernel:
+	case BPF_FUNC_probe_read:
+		return &bpf_probe_read_kernel_proto;
+	case BPF_FUNC_probe_read_str_user:
+		return &bpf_probe_read_str_user_proto;
+	case BPF_FUNC_probe_read_str_kernel:
 	case BPF_FUNC_probe_read_str:
-		return &bpf_probe_read_str_proto;
+		return &bpf_probe_read_str_kernel_proto;
 #ifdef CONFIG_CGROUPS
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4af8b0819a32..b8ffb419df51 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -564,7 +564,11 @@ union bpf_attr {
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
- * 		address *src* and store the data in *dst*.
+ * 		kernel space address *src* and store the data in *dst*.
+ *
+ * 		This helper is an alias to bpf_probe_read_kernel().
+ *
+ * 		Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() instead.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -1428,43 +1432,14 @@ union bpf_attr {
  *
  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
  * 	Description
- * 		Copy a NUL terminated string from an unsafe address
- * 		*unsafe_ptr* to *dst*. The *size* should include the
- * 		terminating NUL byte. In case the string length is smaller than
- * 		*size*, the target is not padded with further NUL bytes. If the
- * 		string length is larger than *size*, just *size*-1 bytes are
- * 		copied and the last byte is set to NUL.
- *
- * 		On success, the length of the copied string is returned. This
- * 		makes this helper useful in tracing programs for reading
- * 		strings, and more importantly to get its length at runtime. See
- * 		the following snippet:
- *
- * 		::
- *
- * 			SEC("kprobe/sys_open")
- * 			void bpf_sys_open(struct pt_regs *ctx)
- * 			{
- * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
- * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
- * 				                             ctx->di);
- *
- * 				// Consume buf, for example push it to
- * 				// userspace via bpf_perf_event_output(); we
- * 				// can use res (the string length) as event
- * 				// size, after checking its boundaries.
- * 			}
+ * 		Copy a NUL terminated string from an unsafe kernel address
+ * 		*unsafe_ptr* to *dst*. See bpf_probe_read_str_kernel() for
+ * 		more details.
  *
- * 		In comparison, using **bpf_probe_read()** helper here instead
- * 		to read the string would require to estimate the length at
- * 		compile time, and would often result in copying more memory
- * 		than necessary.
+ * 		This helper is an alias to bpf_probe_read_str_kernel().
  *
- * 		Another useful use case is when parsing individual process
- * 		arguments or individual environment variables navigating
- * 		*current*\ **->mm->arg_start** and *current*\
- * 		**->mm->env_start**: using this helper and the return value,
- * 		one can quickly iterate at the right offset of the memory area.
+ * 		Generally, use bpf_probe_read_str_user() or bpf_probe_read_str_kernel()
+ * 		instead.
  * 	Return
  * 		On success, the strictly positive length of the string,
  * 		including the trailing NUL character. On error, a negative
@@ -2775,6 +2750,72 @@ union bpf_attr {
  * 		restricted to raw_tracepoint bpf programs.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *src)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from user space address
+ * 		*src* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *src)
+ * 	Description
+ * 		Safely attempt to read *size* bytes from kernel space address
+ * 		*src* and store the data in *dst*.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str_user(void *dst, int size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe user address
+ * 		*unsafe_ptr* to *dst*. The *size* should include the
+ * 		terminating NUL byte. In case the string length is smaller than
+ * 		*size*, the target is not padded with further NUL bytes. If the
+ * 		string length is larger than *size*, just *size*-1 bytes are
+ * 		copied and the last byte is set to NUL.
+ *
+ * 		On success, the length of the copied string is returned. This
+ * 		makes this helper useful in tracing programs for reading
+ * 		strings, and more importantly to get its length at runtime. See
+ * 		the following snippet:
+ *
+ * 		::
+ *
+ * 			SEC("kprobe/sys_open")
+ * 			void bpf_sys_open(struct pt_regs *ctx)
+ * 			{
+ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+ * 			        int res = bpf_probe_read_str_user(buf, sizeof(buf),
+ * 				                                  ctx->di);
+ *
+ * 				// Consume buf, for example push it to
+ * 				// userspace via bpf_perf_event_output(); we
+ * 				// can use res (the string length) as event
+ * 				// size, after checking its boundaries.
+ * 			}
+ *
+ * 		In comparison, using **bpf_probe_read_user()** helper here
+ * 		instead to read the string would require to estimate the length
+ * 		at compile time, and would often result in copying more memory
+ * 		than necessary.
+ *
+ * 		Another useful use case is when parsing individual process
+ * 		arguments or individual environment variables navigating
+ * 		*current*\ **->mm->arg_start** and *current*\
+ * 		**->mm->env_start**: using this helper and the return value,
+ * 		one can quickly iterate at the right offset of the memory area.
+ * 	Return
+ * 		On success, the strictly positive length of the string,
+ * 		including the trailing NUL character. On error, a negative
+ * 		value.
+ *
+ * int bpf_probe_read_str_kernel(void *dst, int size, const void *unsafe_ptr)
+ * 	Description
+ * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * 		to *dst*. Same semantics as with bpf_probe_read_str_user() apply.
+ * 	Return
+ * 		On success, the strictly positive length of the string,	including
+ * 		the trailing NUL character. On error, a negative value.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2888,7 +2929,11 @@ union bpf_attr {
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
 	FN(tcp_gen_syncookie),		\
-	FN(skb_output),
+	FN(skb_output),			\
+	FN(probe_read_user),		\
+	FN(probe_read_kernel),		\
+	FN(probe_read_str_user),	\
+	FN(probe_read_str_kernel),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
2.21.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH bpf-next 4/5] bpf, samples: Use bpf_probe_read_user where appropriate
  2019-10-25 16:37 [PATCH bpf-next 0/5] Fix BPF probe memory helpers Daniel Borkmann
                   ` (2 preceding siblings ...)
  2019-10-25 16:37 ` [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers Daniel Borkmann
@ 2019-10-25 16:37 ` Daniel Borkmann
  2019-10-25 22:08   ` Andrii Nakryiko
  2019-10-25 16:37 ` [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space Daniel Borkmann
  4 siblings, 1 reply; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 16:37 UTC (permalink / raw)
  To: ast; +Cc: bpf, netdev, Daniel Borkmann

Use bpf_probe_read_user() helper instead of bpf_probe_read() for samples that
attach to kprobes probing on user addresses.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/map_perf_test_kern.c         | 4 ++--
 samples/bpf/test_map_in_map_kern.c       | 4 ++--
 samples/bpf/test_probe_write_user_kern.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 5c11aefbc489..281bcdaee58e 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -181,8 +181,8 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
 	if (addrlen != sizeof(*in6))
 		return 0;
 
-	ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
-			     &in6->sin6_addr);
+	ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
+				  &in6->sin6_addr);
 	if (ret)
 		goto done;
 
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
index 4f80cbe74c72..32ee752f19df 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map_kern.c
@@ -118,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx)
 	if (addrlen != sizeof(*in6))
 		return 0;
 
-	ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+	ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
 	if (ret) {
 		inline_ret = ret;
 		goto done;
@@ -129,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx)
 
 	test_case = dst6[7];
 
-	ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+	ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
 	if (ret) {
 		inline_ret = ret;
 		goto done;
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
index a543358218e6..b7c48f37132c 100644
--- a/samples/bpf/test_probe_write_user_kern.c
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -37,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx)
 	if (sockaddr_len > sizeof(orig_addr))
 		return 0;
 
-	if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
+	if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
 		return 0;
 
 	mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
-- 
2.21.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space
  2019-10-25 16:37 [PATCH bpf-next 0/5] Fix BPF probe memory helpers Daniel Borkmann
                   ` (3 preceding siblings ...)
  2019-10-25 16:37 ` [PATCH bpf-next 4/5] bpf, samples: Use bpf_probe_read_user where appropriate Daniel Borkmann
@ 2019-10-25 16:37 ` Daniel Borkmann
  2019-10-25 22:14   ` Andrii Nakryiko
  2019-10-25 23:36   ` Andrii Nakryiko
  4 siblings, 2 replies; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 16:37 UTC (permalink / raw)
  To: ast; +Cc: bpf, netdev, Daniel Borkmann, Ilya Leoshkevich

Tested on x86-64 and Ilya was also kind enough to give it a spin on
s390x, both passing with probe_user:OK there. The test is using the
newly added bpf_probe_read_user() to dump sockaddr from connect call
into BPF map and overrides the user buffer via bpf_probe_write_user():

  # ./test_progs
  [...]
  #17 pkt_md_access:OK
  #18 probe_user:OK
  #19 prog_run_xattr:OK
  [...]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 .../selftests/bpf/prog_tests/probe_user.c     | 80 +++++++++++++++++++
 .../selftests/bpf/progs/test_probe_user.c     | 33 ++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_user.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_probe_user.c

diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
new file mode 100644
index 000000000000..e37761bda8a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_probe_user(void)
+{
+#define kprobe_name "__sys_connect"
+	const char *prog_name = "kprobe/" kprobe_name;
+	const char *obj_file = "./test_probe_user.o";
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+		.relaxed_maps = true,
+	);
+	int err, results_map_fd, sock_fd, duration;
+	struct sockaddr curr, orig, tmp;
+	struct sockaddr_in *in = (struct sockaddr_in *)&curr;
+	struct bpf_link *kprobe_link = NULL;
+	struct bpf_program *kprobe_prog;
+	struct bpf_object *obj;
+	static const int zero = 0;
+
+	obj = bpf_object__open_file(obj_file, &opts);
+	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+		return;
+
+	kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!kprobe_prog, "find_probe",
+		  "prog '%s' not found\n", prog_name))
+		goto cleanup;
+
+	err = bpf_object__load(obj);
+	if (CHECK(err, "obj_load", "err %d\n", err))
+		goto cleanup;
+
+	results_map_fd = bpf_find_map(__func__, obj, "results_map");
+	if (CHECK(results_map_fd < 0, "find_results_map",
+		  "err %d\n", results_map_fd))
+		goto cleanup;
+
+	kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
+						 kprobe_name);
+	if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
+		  "err %ld\n", PTR_ERR(kprobe_link))) {
+		kprobe_link = NULL;
+		goto cleanup;
+	}
+
+	memset(&curr, 0, sizeof(curr));
+	in->sin_family = AF_INET;
+	in->sin_port = htons(5555);
+	in->sin_addr.s_addr = inet_addr("255.255.255.255");
+	memcpy(&orig, &curr, sizeof(curr));
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
+		goto cleanup;
+
+	connect(sock_fd, &curr, sizeof(curr));
+	close(sock_fd);
+
+	err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
+	if (CHECK(err, "get_kprobe_res",
+		  "failed to get kprobe res: %d\n", err))
+		goto cleanup;
+
+	in = (struct sockaddr_in *)&tmp;
+	if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
+		  "wrong kprobe res from probe read: %s:%u\n",
+		  inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
+		goto cleanup;
+
+	memset(&tmp, 0xab, sizeof(tmp));
+
+	in = (struct sockaddr_in *)&curr;
+	if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
+		  "wrong kprobe res from probe write: %s:%u\n",
+		  inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
+		goto cleanup;
+cleanup:
+	bpf_link__destroy(kprobe_link);
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
new file mode 100644
index 000000000000..a9b8a0bde0b9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+
+#include <netinet/in.h>
+
+#include "bpf_helpers.h"
+#include "bpf_tracing.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct sockaddr_in);
+} results_map SEC(".maps");
+
+SEC("kprobe/__sys_connect")
+int handle_sys_connect(struct pt_regs *ctx)
+{
+	void *ptr = (void *)PT_REGS_PARM2(ctx);
+	struct sockaddr_in old, new;
+	const int zero = 0;
+
+	bpf_probe_read_user(&old, sizeof(old), ptr);
+	bpf_map_update_elem(&results_map, &zero, &old, 0);
+	__builtin_memset(&new, 0xab, sizeof(new));
+	bpf_probe_write_user(ptr, &new, sizeof(new));
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.21.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function
  2019-10-25 16:37 ` [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function Daniel Borkmann
@ 2019-10-25 21:53   ` Andrii Nakryiko
  2019-10-25 22:15     ` Daniel Borkmann
  0 siblings, 1 reply; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 21:53 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Commit 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions")
> missed to add probe write function, therefore factor out a probe_write_common()
> helper with most logic of probe_kernel_write() except setting KERNEL_DS, and
> add a new probe_user_write() helper so it can be used from BPF side.
>
> Again, on some archs, the user address space and kernel address space can
> co-exist and be overlapping, so in such case, setting KERNEL_DS would mean
> that the given address is treated as being in kernel address space.
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---

LGTM. See an EFAULT comment below, though.

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  include/linux/uaccess.h | 12 +++++++++++
>  mm/maccess.c            | 45 +++++++++++++++++++++++++++++++++++++----
>  2 files changed, 53 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index e47d0522a1f4..86dcf2894672 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -337,6 +337,18 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
>  extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
>  extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
>
> +/*
> + * probe_user_write(): safely attempt to write to a location in user space
> + * @dst: address to write to
> + * @src: pointer to the data that shall be written
> + * @size: size of the data chunk
> + *
> + * Safely write to address @dst from the buffer at @src.  If a kernel fault
> + * happens, handle that and return -EFAULT.
> + */
> +extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
> +extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
> +
>  extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
>  extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
>                                      long count);
> diff --git a/mm/maccess.c b/mm/maccess.c
> index d065736f6b87..2d3c3d01064c 100644
> --- a/mm/maccess.c

[...]

>
> +/**
> + * probe_user_write(): safely attempt to write to a user-space location
> + * @dst: address to write to
> + * @src: pointer to the data that shall be written
> + * @size: size of the data chunk
> + *
> + * Safely write to address @dst from the buffer at @src.  If a kernel fault
> + * happens, handle that and return -EFAULT.
> + */
> +
> +long __weak probe_user_write(void __user *dst, const void *src, size_t size)
> +    __attribute__((alias("__probe_user_write")));

curious, why is there this dance of probe_user_write alias to
__probe_user_write (and for other pairs of functions as well)?

> +
> +long __probe_user_write(void __user *dst, const void *src, size_t size)
> +{
> +       long ret = -EFAULT;

This initialization is not necessary, is it? Similarly in
__probe_user_read higher in this file.

> +       mm_segment_t old_fs = get_fs();
> +
> +       set_fs(USER_DS);
> +       if (access_ok(dst, size))
> +               ret = probe_write_common(dst, src, size);
> +       set_fs(old_fs);
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(probe_user_write);
>
>  /**
>   * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
> --
> 2.21.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 2/5] bpf: Make use of probe_user_write in probe write helper
  2019-10-25 16:37 ` [PATCH bpf-next 2/5] bpf: Make use of probe_user_write in probe write helper Daniel Borkmann
@ 2019-10-25 21:59   ` Andrii Nakryiko
  0 siblings, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 21:59 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Convert the bpf_probe_write_user() helper to probe_user_write() such that
> writes are not attempted under KERNEL_DS anymore which is buggy as kernel
> and user space pointers can have overlapping addresses. Also, given we have
> the access_ok() check inside probe_user_write(), the helper doesn't need
> to do it twice.
>
> Fixes: 96ae52279594 ("bpf: Add bpf_probe_write_user BPF helper to be called in tracers")
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---

Acked-by: Andrii Nakryiko <andriin@fb.com>

[...]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers
  2019-10-25 16:37 ` [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers Daniel Borkmann
@ 2019-10-25 22:08   ` Andrii Nakryiko
  2019-10-25 22:20     ` Daniel Borkmann
  0 siblings, 1 reply; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 22:08 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> The current bpf_probe_read() and bpf_probe_read_str() helpers are broken
> in that they assume they can be used for probing memory access for kernel
> space addresses /as well as/ user space addresses.
>
> However, plain use of probe_kernel_read() for both cases will attempt to
> always access kernel space address space given access is performed under
> KERNEL_DS and some archs in-fact have overlapping address spaces where a
> kernel pointer and user pointer would have the /same/ address value and
> therefore accessing application memory via bpf_probe_read{,_str}() would
> read garbage values.
>
> Lets fix BPF side by making use of recently added 3d7081822f7f ("uaccess:
> Add non-pagefault user-space read functions"). Unfortunately, the only way
> to fix this status quo is to add dedicated bpf_probe_read_{user,kernel}()
> and bpf_probe_read_str_{user,kernel}() helpers. The bpf_probe_read{,_str}()
> helpers are aliased to the *_kernel() variants to retain their current
> behavior; for API consistency and ease of use the latter have been added
> so that it is immediately *obvious* which address space the memory is being
> probed on (user,kernel). The two *_user() variants attempt the access under
> USER_DS set.
>
> Fixes: a5e8c07059d0 ("bpf: add bpf_probe_read_str helper")
> Fixes: 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes")
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  include/uapi/linux/bpf.h       | 119 ++++++++++++++++++++---------
>  kernel/trace/bpf_trace.c       | 133 ++++++++++++++++++++++-----------
>  tools/include/uapi/linux/bpf.h | 119 ++++++++++++++++++++---------
>  3 files changed, 253 insertions(+), 118 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4af8b0819a32..b8ffb419df51 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -564,7 +564,11 @@ union bpf_attr {
>   * int bpf_probe_read(void *dst, u32 size, const void *src)
>   *     Description
>   *             For tracing programs, safely attempt to read *size* bytes from
> - *             address *src* and store the data in *dst*.
> + *             kernel space address *src* and store the data in *dst*.
> + *
> + *             This helper is an alias to bpf_probe_read_kernel().
> + *
> + *             Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() instead.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
>   *
> @@ -1428,43 +1432,14 @@ union bpf_attr {
>   *
>   * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)

seems like an approriate time to standardize terminology. Should it be
unsafe_ptr like here, or src like in bpf_probe_read description?

>   *     Description
> - *             Copy a NUL terminated string from an unsafe address
> - *             *unsafe_ptr* to *dst*. The *size* should include the
> - *             terminating NUL byte. In case the string length is smaller than
> - *             *size*, the target is not padded with further NUL bytes. If the
> - *             string length is larger than *size*, just *size*-1 bytes are
> - *             copied and the last byte is set to NUL.
> - *

[...]

>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -2888,7 +2929,11 @@ union bpf_attr {
>         FN(sk_storage_delete),          \
>         FN(send_signal),                \
>         FN(tcp_gen_syncookie),          \
> -       FN(skb_output),
> +       FN(skb_output),                 \
> +       FN(probe_read_user),            \
> +       FN(probe_read_kernel),          \
> +       FN(probe_read_str_user),        \
> +       FN(probe_read_str_kernel),

naming is subjective, but I'd go with probe_{user,kernel}_read[_str]
scheme, but given bpf_probe_write_user and desire to stay consistent,
I'd still stick to slightly different probe_read_{user,kernel}[_str]
scheme.

>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 79919a26cd59..ff001b766799 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -138,12 +138,52 @@ static const struct bpf_func_proto bpf_override_return_proto = {
>  };
>  #endif
>

[...]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 4/5] bpf, samples: Use bpf_probe_read_user where appropriate
  2019-10-25 16:37 ` [PATCH bpf-next 4/5] bpf, samples: Use bpf_probe_read_user where appropriate Daniel Borkmann
@ 2019-10-25 22:08   ` Andrii Nakryiko
  0 siblings, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 22:08 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 1:46 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Use bpf_probe_read_user() helper instead of bpf_probe_read() for samples that
> attach to kprobes probing on user addresses.
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  samples/bpf/map_perf_test_kern.c         | 4 ++--
>  samples/bpf/test_map_in_map_kern.c       | 4 ++--
>  samples/bpf/test_probe_write_user_kern.c | 2 +-
>  3 files changed, 5 insertions(+), 5 deletions(-)
>

[...]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space
  2019-10-25 16:37 ` [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space Daniel Borkmann
@ 2019-10-25 22:14   ` Andrii Nakryiko
  2019-10-25 22:38     ` Daniel Borkmann
  2019-10-25 23:36   ` Andrii Nakryiko
  1 sibling, 1 reply; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 22:14 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking, Ilya Leoshkevich

On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Tested on x86-64 and Ilya was also kind enough to give it a spin on
> s390x, both passing with probe_user:OK there. The test is using the
> newly added bpf_probe_read_user() to dump sockaddr from connect call
> into BPF map and overrides the user buffer via bpf_probe_write_user():
>
>   # ./test_progs
>   [...]
>   #17 pkt_md_access:OK
>   #18 probe_user:OK
>   #19 prog_run_xattr:OK
>   [...]
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>  .../selftests/bpf/prog_tests/probe_user.c     | 80 +++++++++++++++++++
>  .../selftests/bpf/progs/test_probe_user.c     | 33 ++++++++
>  2 files changed, 113 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_user.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_probe_user.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
> new file mode 100644
> index 000000000000..e37761bda8a4
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +
> +void test_probe_user(void)
> +{
> +#define kprobe_name "__sys_connect"
> +       const char *prog_name = "kprobe/" kprobe_name;
> +       const char *obj_file = "./test_probe_user.o";
> +       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
> +               .relaxed_maps = true,

do we need relaxed_maps in this case?

> +       );
> +       int err, results_map_fd, sock_fd, duration;
> +       struct sockaddr curr, orig, tmp;
> +       struct sockaddr_in *in = (struct sockaddr_in *)&curr;
> +       struct bpf_link *kprobe_link = NULL;
> +       struct bpf_program *kprobe_prog;
> +       struct bpf_object *obj;
> +       static const int zero = 0;
> +

[...]

> +
> +struct {
> +       __uint(type, BPF_MAP_TYPE_ARRAY);
> +       __uint(max_entries, 1);
> +       __type(key, int);
> +       __type(value, struct sockaddr_in);
> +} results_map SEC(".maps");
> +
> +SEC("kprobe/__sys_connect")
> +int handle_sys_connect(struct pt_regs *ctx)
> +{
> +       void *ptr = (void *)PT_REGS_PARM2(ctx);
> +       struct sockaddr_in old, new;
> +       const int zero = 0;
> +
> +       bpf_probe_read_user(&old, sizeof(old), ptr);
> +       bpf_map_update_elem(&results_map, &zero, &old, 0);

could have used global data and read directly into it :)

> +       __builtin_memset(&new, 0xab, sizeof(new));
> +       bpf_probe_write_user(ptr, &new, sizeof(new));
> +
> +       return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.21.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function
  2019-10-25 21:53   ` Andrii Nakryiko
@ 2019-10-25 22:15     ` Daniel Borkmann
  2019-10-25 22:43       ` Andrii Nakryiko
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 22:15 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 02:53:07PM -0700, Andrii Nakryiko wrote:
> On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > Commit 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions")
> > missed to add probe write function, therefore factor out a probe_write_common()
> > helper with most logic of probe_kernel_write() except setting KERNEL_DS, and
> > add a new probe_user_write() helper so it can be used from BPF side.
> >
> > Again, on some archs, the user address space and kernel address space can
> > co-exist and be overlapping, so in such case, setting KERNEL_DS would mean
> > that the given address is treated as being in kernel address space.
> >
> > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> 
> LGTM. See an EFAULT comment below, though.
> 
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> 
> [...]
> 
> > +/**
> > + * probe_user_write(): safely attempt to write to a user-space location
> > + * @dst: address to write to
> > + * @src: pointer to the data that shall be written
> > + * @size: size of the data chunk
> > + *
> > + * Safely write to address @dst from the buffer at @src.  If a kernel fault
> > + * happens, handle that and return -EFAULT.
> > + */
> > +
> > +long __weak probe_user_write(void __user *dst, const void *src, size_t size)
> > +    __attribute__((alias("__probe_user_write")));
> 
> curious, why is there this dance of probe_user_write alias to
> __probe_user_write (and for other pairs of functions as well)?

Seems done by convention to allow archs to override the __weak marked
functions in order to add additional checks and being able to then call
into the __ prefixed variant.

> > +long __probe_user_write(void __user *dst, const void *src, size_t size)
> > +{
> > +       long ret = -EFAULT;
> 
> This initialization is not necessary, is it? Similarly in
> __probe_user_read higher in this file.

Not entirely sure what you mean. In both there's access_ok() check before
invoking the common helper.

> > +       mm_segment_t old_fs = get_fs();
> > +
> > +       set_fs(USER_DS);
> > +       if (access_ok(dst, size))
> > +               ret = probe_write_common(dst, src, size);
> > +       set_fs(old_fs);
> > +
> > +       return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(probe_user_write);
> >
> >  /**
> >   * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
> > --
> > 2.21.0
> >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers
  2019-10-25 22:08   ` Andrii Nakryiko
@ 2019-10-25 22:20     ` Daniel Borkmann
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 22:20 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 03:08:16PM -0700, Andrii Nakryiko wrote:
> On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > The current bpf_probe_read() and bpf_probe_read_str() helpers are broken
> > in that they assume they can be used for probing memory access for kernel
> > space addresses /as well as/ user space addresses.
> >
> > However, plain use of probe_kernel_read() for both cases will attempt to
> > always access kernel space address space given access is performed under
> > KERNEL_DS and some archs in-fact have overlapping address spaces where a
> > kernel pointer and user pointer would have the /same/ address value and
> > therefore accessing application memory via bpf_probe_read{,_str}() would
> > read garbage values.
> >
> > Lets fix BPF side by making use of recently added 3d7081822f7f ("uaccess:
> > Add non-pagefault user-space read functions"). Unfortunately, the only way
> > to fix this status quo is to add dedicated bpf_probe_read_{user,kernel}()
> > and bpf_probe_read_str_{user,kernel}() helpers. The bpf_probe_read{,_str}()
> > helpers are aliased to the *_kernel() variants to retain their current
> > behavior; for API consistency and ease of use the latter have been added
> > so that it is immediately *obvious* which address space the memory is being
> > probed on (user,kernel). The two *_user() variants attempt the access under
> > USER_DS set.
> >
> > Fixes: a5e8c07059d0 ("bpf: add bpf_probe_read_str helper")
> > Fixes: 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes")
> > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> > ---
> >  include/uapi/linux/bpf.h       | 119 ++++++++++++++++++++---------
> >  kernel/trace/bpf_trace.c       | 133 ++++++++++++++++++++++-----------
> >  tools/include/uapi/linux/bpf.h | 119 ++++++++++++++++++++---------
> >  3 files changed, 253 insertions(+), 118 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4af8b0819a32..b8ffb419df51 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -564,7 +564,11 @@ union bpf_attr {
> >   * int bpf_probe_read(void *dst, u32 size, const void *src)
> >   *     Description
> >   *             For tracing programs, safely attempt to read *size* bytes from
> > - *             address *src* and store the data in *dst*.
> > + *             kernel space address *src* and store the data in *dst*.
> > + *
> > + *             This helper is an alias to bpf_probe_read_kernel().
> > + *
> > + *             Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() instead.
> >   *     Return
> >   *             0 on success, or a negative error in case of failure.
> >   *
> > @@ -1428,43 +1432,14 @@ union bpf_attr {
> >   *
> >   * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
> 
> seems like an approriate time to standardize terminology. Should it be
> unsafe_ptr like here, or src like in bpf_probe_read description?

Makes sense, I'll go for unsafe_ptr in v2 as it feels more descriptive.

> >   *     Description
> > - *             Copy a NUL terminated string from an unsafe address
> > - *             *unsafe_ptr* to *dst*. The *size* should include the
> > - *             terminating NUL byte. In case the string length is smaller than
> > - *             *size*, the target is not padded with further NUL bytes. If the
> > - *             string length is larger than *size*, just *size*-1 bytes are
> > - *             copied and the last byte is set to NUL.
> > - *
> 
> [...]
> 
> >  #define __BPF_FUNC_MAPPER(FN)          \
> >         FN(unspec),                     \
> > @@ -2888,7 +2929,11 @@ union bpf_attr {
> >         FN(sk_storage_delete),          \
> >         FN(send_signal),                \
> >         FN(tcp_gen_syncookie),          \
> > -       FN(skb_output),
> > +       FN(skb_output),                 \
> > +       FN(probe_read_user),            \
> > +       FN(probe_read_kernel),          \
> > +       FN(probe_read_str_user),        \
> > +       FN(probe_read_str_kernel),
> 
> naming is subjective, but I'd go with probe_{user,kernel}_read[_str]
> scheme, but given bpf_probe_write_user and desire to stay consistent,
> I'd still stick to slightly different probe_read_{user,kernel}[_str]
> scheme.

Yeah, I'm fine with changing into probe_read_{user,kernel}[_str], and
it's still in line with bpf_probe_{read,write}_{user,kernel} helpers.

> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> >   * function eBPF program intends to call
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index 79919a26cd59..ff001b766799 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -138,12 +138,52 @@ static const struct bpf_func_proto bpf_override_return_proto = {
> >  };
> >  #endif
> >
> 
> [...]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space
  2019-10-25 22:14   ` Andrii Nakryiko
@ 2019-10-25 22:38     ` Daniel Borkmann
  2019-10-25 23:35       ` Andrii Nakryiko
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Borkmann @ 2019-10-25 22:38 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Alexei Starovoitov, bpf, Networking, Ilya Leoshkevich

On Fri, Oct 25, 2019 at 03:14:49PM -0700, Andrii Nakryiko wrote:
> On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > Tested on x86-64 and Ilya was also kind enough to give it a spin on
> > s390x, both passing with probe_user:OK there. The test is using the
> > newly added bpf_probe_read_user() to dump sockaddr from connect call
> > into BPF map and overrides the user buffer via bpf_probe_write_user():
> >
> >   # ./test_progs
> >   [...]
> >   #17 pkt_md_access:OK
> >   #18 probe_user:OK
> >   #19 prog_run_xattr:OK
> >   [...]
> >
> > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> > Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> >  .../selftests/bpf/prog_tests/probe_user.c     | 80 +++++++++++++++++++
> >  .../selftests/bpf/progs/test_probe_user.c     | 33 ++++++++
> >  2 files changed, 113 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_user.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/test_probe_user.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
> > new file mode 100644
> > index 000000000000..e37761bda8a4
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
> > @@ -0,0 +1,80 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <test_progs.h>
> > +
> > +void test_probe_user(void)
> > +{
> > +#define kprobe_name "__sys_connect"
> > +       const char *prog_name = "kprobe/" kprobe_name;
> > +       const char *obj_file = "./test_probe_user.o";
> > +       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
> > +               .relaxed_maps = true,
> 
> do we need relaxed_maps in this case?

Ah yeap, I'll remove. Test runs fine w/o it. Any particular reason you added it back in
928ca75e59d7 ("selftests/bpf: switch tests to new bpf_object__open_{file, mem}() APIs")?

> > +       );
> > +       int err, results_map_fd, sock_fd, duration;
> > +       struct sockaddr curr, orig, tmp;
> > +       struct sockaddr_in *in = (struct sockaddr_in *)&curr;
> > +       struct bpf_link *kprobe_link = NULL;
> > +       struct bpf_program *kprobe_prog;
> > +       struct bpf_object *obj;
> > +       static const int zero = 0;
> > +
> 
> [...]
> 
> > +
> > +struct {
> > +       __uint(type, BPF_MAP_TYPE_ARRAY);
> > +       __uint(max_entries, 1);
> > +       __type(key, int);
> > +       __type(value, struct sockaddr_in);
> > +} results_map SEC(".maps");
> > +
> > +SEC("kprobe/__sys_connect")
> > +int handle_sys_connect(struct pt_regs *ctx)
> > +{
> > +       void *ptr = (void *)PT_REGS_PARM2(ctx);
> > +       struct sockaddr_in old, new;
> > +       const int zero = 0;
> > +
> > +       bpf_probe_read_user(&old, sizeof(old), ptr);
> > +       bpf_map_update_elem(&results_map, &zero, &old, 0);
> 
> could have used global data and read directly into it :)

Hehe, yeah sure, though that we have covered separately. :-) Wasn't planning to
bug Ilya once again to recompile everything on his s390x box.

> > +       __builtin_memset(&new, 0xab, sizeof(new));
> > +       bpf_probe_write_user(ptr, &new, sizeof(new));
> > +
> > +       return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
> > --
> > 2.21.0
> >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function
  2019-10-25 22:15     ` Daniel Borkmann
@ 2019-10-25 22:43       ` Andrii Nakryiko
  0 siblings, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 22:43 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking

On Fri, Oct 25, 2019 at 3:15 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On Fri, Oct 25, 2019 at 02:53:07PM -0700, Andrii Nakryiko wrote:
> > On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> > >
> > > Commit 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions")
> > > missed to add probe write function, therefore factor out a probe_write_common()
> > > helper with most logic of probe_kernel_write() except setting KERNEL_DS, and
> > > add a new probe_user_write() helper so it can be used from BPF side.
> > >
> > > Again, on some archs, the user address space and kernel address space can
> > > co-exist and be overlapping, so in such case, setting KERNEL_DS would mean
> > > that the given address is treated as being in kernel address space.
> > >
> > > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> >
> > LGTM. See an EFAULT comment below, though.
> >
> > Acked-by: Andrii Nakryiko <andriin@fb.com>
> >
> > [...]
> >
> > > +/**
> > > + * probe_user_write(): safely attempt to write to a user-space location
> > > + * @dst: address to write to
> > > + * @src: pointer to the data that shall be written
> > > + * @size: size of the data chunk
> > > + *
> > > + * Safely write to address @dst from the buffer at @src.  If a kernel fault
> > > + * happens, handle that and return -EFAULT.
> > > + */
> > > +
> > > +long __weak probe_user_write(void __user *dst, const void *src, size_t size)
> > > +    __attribute__((alias("__probe_user_write")));
> >
> > curious, why is there this dance of probe_user_write alias to
> > __probe_user_write (and for other pairs of functions as well)?
>
> Seems done by convention to allow archs to override the __weak marked
> functions in order to add additional checks and being able to then call
> into the __ prefixed variant.
>
> > > +long __probe_user_write(void __user *dst, const void *src, size_t size)
> > > +{
> > > +       long ret = -EFAULT;
> >
> > This initialization is not necessary, is it? Similarly in
> > __probe_user_read higher in this file.
>
> Not entirely sure what you mean. In both there's access_ok() check before
> invoking the common helper.

ah, right, if, yeah, never mind then.

>
> > > +       mm_segment_t old_fs = get_fs();
> > > +
> > > +       set_fs(USER_DS);
> > > +       if (access_ok(dst, size))
> > > +               ret = probe_write_common(dst, src, size);
> > > +       set_fs(old_fs);
> > > +
> > > +       return ret;
> > > +}
> > > +EXPORT_SYMBOL_GPL(probe_user_write);
> > >
> > >  /**
> > >   * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
> > > --
> > > 2.21.0
> > >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space
  2019-10-25 22:38     ` Daniel Borkmann
@ 2019-10-25 23:35       ` Andrii Nakryiko
  0 siblings, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 23:35 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking, Ilya Leoshkevich

On Fri, Oct 25, 2019 at 4:09 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On Fri, Oct 25, 2019 at 03:14:49PM -0700, Andrii Nakryiko wrote:
> > On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> > >
> > > Tested on x86-64 and Ilya was also kind enough to give it a spin on
> > > s390x, both passing with probe_user:OK there. The test is using the
> > > newly added bpf_probe_read_user() to dump sockaddr from connect call
> > > into BPF map and overrides the user buffer via bpf_probe_write_user():
> > >
> > >   # ./test_progs
> > >   [...]
> > >   #17 pkt_md_access:OK
> > >   #18 probe_user:OK
> > >   #19 prog_run_xattr:OK
> > >   [...]
> > >
> > > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> > > Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > > ---
> > >  .../selftests/bpf/prog_tests/probe_user.c     | 80 +++++++++++++++++++
> > >  .../selftests/bpf/progs/test_probe_user.c     | 33 ++++++++
> > >  2 files changed, 113 insertions(+)
> > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_user.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/test_probe_user.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
> > > new file mode 100644
> > > index 000000000000..e37761bda8a4
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
> > > @@ -0,0 +1,80 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#include <test_progs.h>
> > > +
> > > +void test_probe_user(void)
> > > +{
> > > +#define kprobe_name "__sys_connect"
> > > +       const char *prog_name = "kprobe/" kprobe_name;
> > > +       const char *obj_file = "./test_probe_user.o";
> > > +       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
> > > +               .relaxed_maps = true,
> >
> > do we need relaxed_maps in this case?
>
> Ah yeap, I'll remove. Test runs fine w/o it. Any particular reason you added it back in
> 928ca75e59d7 ("selftests/bpf: switch tests to new bpf_object__open_{file, mem}() APIs")?

Hmm, I'm not sure about those tests... probably just copy/pasted
something mechanically. We need .relaxed_maps for tests that add numa
fields, otherwise libbpf will reject them. I shouldn't have added
them.

>
> > > +       );
> > > +       int err, results_map_fd, sock_fd, duration;
> > > +       struct sockaddr curr, orig, tmp;
> > > +       struct sockaddr_in *in = (struct sockaddr_in *)&curr;
> > > +       struct bpf_link *kprobe_link = NULL;
> > > +       struct bpf_program *kprobe_prog;
> > > +       struct bpf_object *obj;
> > > +       static const int zero = 0;
> > > +
> >
> > [...]
> >
> > > +
> > > +struct {
> > > +       __uint(type, BPF_MAP_TYPE_ARRAY);
> > > +       __uint(max_entries, 1);
> > > +       __type(key, int);
> > > +       __type(value, struct sockaddr_in);
> > > +} results_map SEC(".maps");
> > > +
> > > +SEC("kprobe/__sys_connect")
> > > +int handle_sys_connect(struct pt_regs *ctx)
> > > +{
> > > +       void *ptr = (void *)PT_REGS_PARM2(ctx);
> > > +       struct sockaddr_in old, new;
> > > +       const int zero = 0;
> > > +
> > > +       bpf_probe_read_user(&old, sizeof(old), ptr);
> > > +       bpf_map_update_elem(&results_map, &zero, &old, 0);
> >
> > could have used global data and read directly into it :)
>
> Hehe, yeah sure, though that we have covered separately. :-) Wasn't planning to
> bug Ilya once again to recompile everything on his s390x box.

Oh, it's not to test global data, it's because global data make BPF
side of tests much cleaner. But it's minor, feel free to ignore. Once
we have a good interface to global data from user-space, though, there
will be a bigger motivation to switch them to global data, because
both BPF and user side will be much more succinct.

>
> > > +       __builtin_memset(&new, 0xab, sizeof(new));
> > > +       bpf_probe_write_user(ptr, &new, sizeof(new));
> > > +
> > > +       return 0;
> > > +}
> > > +
> > > +char _license[] SEC("license") = "GPL";
> > > --
> > > 2.21.0
> > >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space
  2019-10-25 16:37 ` [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space Daniel Borkmann
  2019-10-25 22:14   ` Andrii Nakryiko
@ 2019-10-25 23:36   ` Andrii Nakryiko
  1 sibling, 0 replies; 17+ messages in thread
From: Andrii Nakryiko @ 2019-10-25 23:36 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Alexei Starovoitov, bpf, Networking, Ilya Leoshkevich

On Fri, Oct 25, 2019 at 1:44 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Tested on x86-64 and Ilya was also kind enough to give it a spin on
> s390x, both passing with probe_user:OK there. The test is using the
> newly added bpf_probe_read_user() to dump sockaddr from connect call
> into BPF map and overrides the user buffer via bpf_probe_write_user():
>
>   # ./test_progs
>   [...]
>   #17 pkt_md_access:OK
>   #18 probe_user:OK
>   #19 prog_run_xattr:OK
>   [...]
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  .../selftests/bpf/prog_tests/probe_user.c     | 80 +++++++++++++++++++
>  .../selftests/bpf/progs/test_probe_user.c     | 33 ++++++++
>  2 files changed, 113 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_user.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_probe_user.c

[...]

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-10-25 23:36 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-25 16:37 [PATCH bpf-next 0/5] Fix BPF probe memory helpers Daniel Borkmann
2019-10-25 16:37 ` [PATCH bpf-next 1/5] uaccess: Add non-pagefault user-space write function Daniel Borkmann
2019-10-25 21:53   ` Andrii Nakryiko
2019-10-25 22:15     ` Daniel Borkmann
2019-10-25 22:43       ` Andrii Nakryiko
2019-10-25 16:37 ` [PATCH bpf-next 2/5] bpf: Make use of probe_user_write in probe write helper Daniel Borkmann
2019-10-25 21:59   ` Andrii Nakryiko
2019-10-25 16:37 ` [PATCH bpf-next 3/5] bpf: Add probe_read_{user,kernel} and probe_read_str_{user,kernel} helpers Daniel Borkmann
2019-10-25 22:08   ` Andrii Nakryiko
2019-10-25 22:20     ` Daniel Borkmann
2019-10-25 16:37 ` [PATCH bpf-next 4/5] bpf, samples: Use bpf_probe_read_user where appropriate Daniel Borkmann
2019-10-25 22:08   ` Andrii Nakryiko
2019-10-25 16:37 ` [PATCH bpf-next 5/5] bpf, testing: Add selftest to read/write sockaddr from user space Daniel Borkmann
2019-10-25 22:14   ` Andrii Nakryiko
2019-10-25 22:38     ` Daniel Borkmann
2019-10-25 23:35       ` Andrii Nakryiko
2019-10-25 23:36   ` Andrii Nakryiko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).