bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Proposal: bpf_copy_from_user_remote
@ 2022-01-12 11:03 Gabriele
  2022-01-13 23:37 ` Kenny Yu
  0 siblings, 1 reply; 5+ messages in thread
From: Gabriele @ 2022-01-12 11:03 UTC (permalink / raw)
  To: bpf; +Cc: ast, daniel

Hi there

As I have mentioned in another thread
(https://lore.kernel.org/bpf/CAGnuNNt7va4u78rvPmusYnhXAuy5e9aRhEeO6HDqYUsH979QLQ@mail.gmail.com/T/)
I started looking into adding a BPF wrapper around process_vm_readv to
allow BPF programs to read the userspace VM of a remote process.

Given my unfamiliarity with the code, I hope you would indulge me in
asking for feedback on my current approach to see whether it makes
sense or not. I have tried to model my change on top of the patch that
introduced bpf_copy_from_user
(https://lwn.net/ml/netdev/20200630043343.53195-4-alexei.starovoitov@gmail.com/)
and this is what I have got so far.

Cheers,
Gab

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e7a163a3146b..05060a709609 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2150,6 +2150,7 @@ extern const struct bpf_func_proto
bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_remote_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_snprintf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..8af855d62a5f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3391,5 +3391,8 @@ static inline int seal_check_future_write(int
seals, struct vm_area_struct *vma)
  return 0;
 }

+extern ssize_t process_vm_read(pid_t pid, void * dst, ssize_t size,
+ const void __user * user_ptr, unsigned long flags);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ba5af15e25f5..436c703f3a13 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4938,6 +4938,14 @@ union bpf_attr {
  * **-ENOENT** if symbol is not found.
  *
  * **-EPERM** if caller does not have permission to obtain kernel address.
+ *
+ * long bpf_copy_from_user_remote(void *dst, u32 size, const void
*user_ptr, pid_t pid)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* of the prodess
+ * *pid* and store the data in *dst*. This is essentially a wrapper of
+ * **process_vm_readv**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN) \
  FN(unspec), \
@@ -5120,6 +5128,7 @@ union bpf_attr {
  FN(trace_vprintk), \
  FN(skc_to_unix_sock), \
  FN(kallsyms_lookup_name), \
+ FN(copy_from_user_remote), \
  /* */

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 649f07623df6..0343870a8c03 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -15,6 +15,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/mm.h>

 #include "../../lib/kstrtox.h"

@@ -656,6 +657,37 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
  .arg3_type = ARG_ANYTHING,
 };

+BPF_CALL_4(bpf_copy_from_user_remote, void *, dst, u32, size,
+    const void __user *, user_ptr, pid_t, pid)
+{
+ int ret;
+ struct iovec local, remote;
+
+ local.iov_base = dst;
+ remote.iov_base = (void *) user_ptr;
+
+ local.iov_len = remote.iov_len = size;
+
+ ret = process_vm_read(pid, dst, size, user_ptr, 0);
+
+ if (unlikely(ret)) {
+ memset(dst, 0, size);
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+const struct bpf_func_proto bpf_copy_from_user_remote_proto = {
+ .func = bpf_copy_from_user_remote,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
  if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ae9755037b7e..b9f27b0b62f9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1208,6 +1208,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id,
const struct bpf_prog *prog)
  return &bpf_get_branch_snapshot_proto;
  case BPF_FUNC_trace_vprintk:
  return bpf_get_trace_vprintk_proto();
+ case BPF_FUNC_copy_from_user_remote:
+ return prog->aux->sleepable ? &bpf_copy_from_user_remote_proto : NULL;
  default:
  return bpf_base_func_proto(func_id);
  }
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 4bcc11958089..90097267b567 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -302,3 +302,12 @@ SYSCALL_DEFINE6(process_vm_writev, pid_t, pid,
 {
  return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1);
 }
+
+ssize_t process_vm_read(pid_t pid, void * dst, ssize_t size,
+ const void __user * user_ptr, unsigned long flags)
+{
+    struct iovec lvec = {.iov_base = dst, .iov_len = size};
+    struct iovec rvec = {.iov_base = (void *) user_ptr, .iov_len = size};
+
+ return process_vm_rw(pid, &lvec, 1, &rvec, 1, flags, 0);
+}
\ No newline at end of file

-- 
"Egli è scritto in lingua matematica, e i caratteri son triangoli,
cerchi, ed altre figure
geometriche, senza i quali mezzi è impossibile a intenderne umanamente parola;
senza questi è un aggirarsi vanamente per un oscuro laberinto."

-- G. Galilei, Il saggiatore.

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: Proposal: bpf_copy_from_user_remote
  2022-01-12 11:03 Proposal: bpf_copy_from_user_remote Gabriele
@ 2022-01-13 23:37 ` Kenny Yu
  2022-01-14 10:00   ` Gabriele
  0 siblings, 1 reply; 5+ messages in thread
From: Kenny Yu @ 2022-01-13 23:37 UTC (permalink / raw)
  To: phoenix1987; +Cc: ast, bpf, daniel, yhs, Kenny Yu

Hi Gabriele,

I just submitted a patch series that adds a similar helper to read
userspace memory from a remote process, please see: https://lore.kernel.org/bpf/20220113233158.1582743-1-kennyyu@fb.com/T/#ma0646f96bccf0b957793054de7404115d321079d

In my patch series, I added a bpf helper to wrap `access_process_vm`
which takes a `struct task_struct` argument instead of a pid.

In your patch series, one issue would be it is not clear which pid namespace
the pid belongs to, whereas passing a `struct task_struct` is unambiguous.
I think the helper signature in my patch series also provides more flexibility,
as the bpf program can also provide different flags on how to read
userspace memory.

Our use case at Meta for this change is to use a bpf task iterator program
to read debug information from a running process in production, e.g.,
extract C++ async stack traces from a running program.

A few questions:
* What is your use case for adding this helper?
* Do you have a specific requirement that requires using a pid, or would a
  helper using `struct task_struct` be sufficient?
* Are you ok with these changes? If so, I will proceed with my patch series.

Thanks,
Kenny Yu

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Proposal: bpf_copy_from_user_remote
  2022-01-13 23:37 ` Kenny Yu
@ 2022-01-14 10:00   ` Gabriele
  2022-01-14 16:13     ` Gabriele
  0 siblings, 1 reply; 5+ messages in thread
From: Gabriele @ 2022-01-14 10:00 UTC (permalink / raw)
  To: Kenny Yu; +Cc: ast, bpf, daniel, yhs

Hi Kenny

Your patch series looks neat! I haven't come across access_process_vm
during my source exploration. Indeed, passing a process descriptor
seems like a good idea. I presume one would then use, e.g.,
find_task_by_pid_ns to convert a pid+ns to a struct task_struct.

My use cases are about general observability into runtimes like
Python. For profiling, I would like to make a BPF version of Austin.
There is a variant for Linux that can be used to collect native
(including kernel) stacks (see
https://github.com/P403n1x87/austin#native-frame-stack), but this
works in a "traditional" way, using ptrace via libunwind. My idea is
to implement Python stack unwinding as a BPF program so that native
and runtime stacks could be both collected and then interleaved, like
austinp currently does. I think that, perhaps, I'd need a sleepable
version of the perf_event section to achieve this.

I have debugging use cases for Python in mind too, in particular for
native extensions. I believe these are similar to your C++ use cases.
I don't expect to be needing to iterate over all running tasks, so as
long as the new helper can be used against specific processes that can
be identified via pid (and namespace) then I'm totally fine with your
patch series.

Cheers,
Gab

On Thu, 13 Jan 2022 at 23:37, Kenny Yu <kennyyu@fb.com> wrote:
>
> Hi Gabriele,
>
> I just submitted a patch series that adds a similar helper to read
> userspace memory from a remote process, please see: https://lore.kernel.org/bpf/20220113233158.1582743-1-kennyyu@fb.com/T/#ma0646f96bccf0b957793054de7404115d321079d
>
> In my patch series, I added a bpf helper to wrap `access_process_vm`
> which takes a `struct task_struct` argument instead of a pid.
>
> In your patch series, one issue would be it is not clear which pid namespace
> the pid belongs to, whereas passing a `struct task_struct` is unambiguous.
> I think the helper signature in my patch series also provides more flexibility,
> as the bpf program can also provide different flags on how to read
> userspace memory.
>
> Our use case at Meta for this change is to use a bpf task iterator program
> to read debug information from a running process in production, e.g.,
> extract C++ async stack traces from a running program.
>
> A few questions:
> * What is your use case for adding this helper?
> * Do you have a specific requirement that requires using a pid, or would a
>   helper using `struct task_struct` be sufficient?
> * Are you ok with these changes? If so, I will proceed with my patch series.
>
> Thanks,
> Kenny Yu



-- 
"Egli è scritto in lingua matematica, e i caratteri son triangoli,
cerchi, ed altre figure
geometriche, senza i quali mezzi è impossibile a intenderne umanamente parola;
senza questi è un aggirarsi vanamente per un oscuro laberinto."

-- G. Galilei, Il saggiatore.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Proposal: bpf_copy_from_user_remote
  2022-01-14 10:00   ` Gabriele
@ 2022-01-14 16:13     ` Gabriele
  2022-01-14 22:24       ` Kenny Yu
  0 siblings, 1 reply; 5+ messages in thread
From: Gabriele @ 2022-01-14 16:13 UTC (permalink / raw)
  To: Kenny Yu; +Cc: ast, bpf, daniel, yhs

I have to correct myself. Looking at my notes, I actually came across
access_process_vm. However this line in the comment threw me off and I
stopped looking deeper into it

* Source/target buffer must be kernel space,

I should've read that and the function signature more carefully. I
thought this meant both source and target were supposed to be kernel
space. I switched to access_process_vm (but kept my signature) and got
something that works as intended for my use.

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e7a163a3146b..05060a709609 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2150,6 +2150,7 @@ extern const struct bpf_func_proto
bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
+extern const struct bpf_func_proto bpf_copy_from_user_remote_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_snprintf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ba5af15e25f5..436c703f3a13 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4938,6 +4938,14 @@ union bpf_attr {
  * **-ENOENT** if symbol is not found.
  *
  * **-EPERM** if caller does not have permission to obtain kernel address.
+ *
+ * long bpf_copy_from_user_remote(void *dst, u32 size, const void
*user_ptr, pid_t pid)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* of the prodess
+ * *pid* and store the data in *dst*. This is essentially a wrapper of
+ * **process_vm_readv**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN) \
  FN(unspec), \
@@ -5120,6 +5128,7 @@ union bpf_attr {
  FN(trace_vprintk), \
  FN(skc_to_unix_sock), \
  FN(kallsyms_lookup_name), \
+ FN(copy_from_user_remote), \
  /* */

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 649f07623df6..07e9540c6e3a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -15,6 +15,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/security.h>
+#include <linux/mm.h>

 #include "../../lib/kstrtox.h"

@@ -656,6 +657,31 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
  .arg3_type = ARG_ANYTHING,
 };

+BPF_CALL_4(bpf_copy_from_user_remote, void *, dst, u32, size,
+    const void __user *, user_ptr, pid_t, pid)
+{
+ struct task_struct * task;
+
+ if (unlikely(size == 0))
+ return 0;
+
+ task = find_get_task_by_vpid(pid);
+ if (!task)
+ return -ESRCH;
+
+ return access_process_vm(task, (unsigned long) user_ptr, dst, size, 0);
+}
+
+const struct bpf_func_proto bpf_copy_from_user_remote_proto = {
+ .func = bpf_copy_from_user_remote,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
 {
  if (cpu >= nr_cpu_ids)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ae9755037b7e..b9f27b0b62f9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1208,6 +1208,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id,
const struct bpf_prog *prog)
  return &bpf_get_branch_snapshot_proto;
  case BPF_FUNC_trace_vprintk:
  return bpf_get_trace_vprintk_proto();
+ case BPF_FUNC_copy_from_user_remote:
+ return prog->aux->sleepable ? &bpf_copy_from_user_remote_proto : NULL;
  default:
  return bpf_base_func_proto(func_id);
  }
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index a6403ddf5de7..bd092f1692e2 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -614,6 +614,7 @@ class PrinterHelpers(Printer):
             'const struct sk_buff': 'const struct __sk_buff',
             'struct sk_msg_buff': 'struct sk_msg_md',
             'struct xdp_buff': 'struct xdp_md',
+            "pid_t": "int",
     }
     # Helpers overloaded for different context types.
     overloaded_helpers = [
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ba5af15e25f5..436c703f3a13 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4938,6 +4938,14 @@ union bpf_attr {
  * **-ENOENT** if symbol is not found.
  *
  * **-EPERM** if caller does not have permission to obtain kernel address.
+ *
+ * long bpf_copy_from_user_remote(void *dst, u32 size, const void
*user_ptr, pid_t pid)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* of the prodess
+ * *pid* and store the data in *dst*. This is essentially a wrapper of
+ * **process_vm_readv**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN) \
  FN(unspec), \
@@ -5120,6 +5128,7 @@ union bpf_attr {
  FN(trace_vprintk), \
  FN(skc_to_unix_sock), \
  FN(kallsyms_lookup_name), \
+ FN(copy_from_user_remote), \
  /* */

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

On Fri, 14 Jan 2022 at 10:00, Gabriele <phoenix1987@gmail.com> wrote:
>
> Hi Kenny
>
> Your patch series looks neat! I haven't come across access_process_vm
> during my source exploration. Indeed, passing a process descriptor
> seems like a good idea. I presume one would then use, e.g.,
> find_task_by_pid_ns to convert a pid+ns to a struct task_struct.
>
> My use cases are about general observability into runtimes like
> Python. For profiling, I would like to make a BPF version of Austin.
> There is a variant for Linux that can be used to collect native
> (including kernel) stacks (see
> https://github.com/P403n1x87/austin#native-frame-stack), but this
> works in a "traditional" way, using ptrace via libunwind. My idea is
> to implement Python stack unwinding as a BPF program so that native
> and runtime stacks could be both collected and then interleaved, like
> austinp currently does. I think that, perhaps, I'd need a sleepable
> version of the perf_event section to achieve this.
>
> I have debugging use cases for Python in mind too, in particular for
> native extensions. I believe these are similar to your C++ use cases.
> I don't expect to be needing to iterate over all running tasks, so as
> long as the new helper can be used against specific processes that can
> be identified via pid (and namespace) then I'm totally fine with your
> patch series.
>
> Cheers,
> Gab
>
> On Thu, 13 Jan 2022 at 23:37, Kenny Yu <kennyyu@fb.com> wrote:
> >
> > Hi Gabriele,
> >
> > I just submitted a patch series that adds a similar helper to read
> > userspace memory from a remote process, please see: https://lore.kernel.org/bpf/20220113233158.1582743-1-kennyyu@fb.com/T/#ma0646f96bccf0b957793054de7404115d321079d
> >
> > In my patch series, I added a bpf helper to wrap `access_process_vm`
> > which takes a `struct task_struct` argument instead of a pid.
> >
> > In your patch series, one issue would be it is not clear which pid namespace
> > the pid belongs to, whereas passing a `struct task_struct` is unambiguous.
> > I think the helper signature in my patch series also provides more flexibility,
> > as the bpf program can also provide different flags on how to read
> > userspace memory.
> >
> > Our use case at Meta for this change is to use a bpf task iterator program
> > to read debug information from a running process in production, e.g.,
> > extract C++ async stack traces from a running program.
> >
> > A few questions:
> > * What is your use case for adding this helper?
> > * Do you have a specific requirement that requires using a pid, or would a
> >   helper using `struct task_struct` be sufficient?
> > * Are you ok with these changes? If so, I will proceed with my patch series.
> >
> > Thanks,
> > Kenny Yu
>
>
>
> --
> "Egli è scritto in lingua matematica, e i caratteri son triangoli,
> cerchi, ed altre figure
> geometriche, senza i quali mezzi è impossibile a intenderne umanamente parola;
> senza questi è un aggirarsi vanamente per un oscuro laberinto."
>
> -- G. Galilei, Il saggiatore.



-- 
"Egli è scritto in lingua matematica, e i caratteri son triangoli,
cerchi, ed altre figure
geometriche, senza i quali mezzi è impossibile a intenderne umanamente parola;
senza questi è un aggirarsi vanamente per un oscuro laberinto."

-- G. Galilei, Il saggiatore.

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: Proposal: bpf_copy_from_user_remote
  2022-01-14 16:13     ` Gabriele
@ 2022-01-14 22:24       ` Kenny Yu
  0 siblings, 0 replies; 5+ messages in thread
From: Kenny Yu @ 2022-01-14 22:24 UTC (permalink / raw)
  To: phoenix1987; +Cc: ast, bpf, daniel, kennyyu, yhs

Hi Gabriele,

Thanks for the context about your use case!

> I don't expect to be needing to iterate over all running tasks, so as
long as the new helper can be used against specific processes that can
be identified via pid (and namespace) then I'm totally fine with your
patch series.

Sounds great! I'll proceed with my patch series based on `access_process_vm`.

> I switched to access_process_vm (but kept my signature) and got
something that works as intended for my use.

Awesome!

Thanks,
Kenny

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-01-14 22:25 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-12 11:03 Proposal: bpf_copy_from_user_remote Gabriele
2022-01-13 23:37 ` Kenny Yu
2022-01-14 10:00   ` Gabriele
2022-01-14 16:13     ` Gabriele
2022-01-14 22:24       ` Kenny Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).