bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrii Nakryiko <andrii.nakryiko@gmail.com>
To: Kenny Yu <kennyyu@fb.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Andrii Nakryiko <andrii@kernel.org>,
	Alexei Starovoitov <ast@kernel.org>, bpf <bpf@vger.kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Gabriele <phoenix1987@gmail.com>, Yonghong Song <yhs@fb.com>
Subject: Re: [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper
Date: Thu, 20 Jan 2022 14:45:49 -0800	[thread overview]
Message-ID: <CAEf4BzbEqSh36mFsrwtMYD6c-=LcJ3XbJsEa1ZatLdWkB+3mtQ@mail.gmail.com> (raw)
In-Reply-To: <20220120172942.246805-2-kennyyu@fb.com>

On Thu, Jan 20, 2022 at 9:30 AM Kenny Yu <kennyyu@fb.com> wrote:
>
> This adds a helper for bpf programs to access the memory of other
> tasks. This also adds the ability for bpf iterator programs to
> be sleepable.
>
> This changes `bpf_iter_run_prog` to use the appropriate synchronization for
> sleepable bpf programs. With sleepable bpf iterator programs, we can no
> longer use `rcu_read_lock()` and must use `rcu_read_lock_trace()` instead
> to protect the bpf program.
>
> As an example use case at Meta, we are using a bpf task iterator program
> and this new helper to print C++ async stack traces for all threads of
> a given process.
>
> Signed-off-by: Kenny Yu <kennyyu@fb.com>
> ---
>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/bpf.h       | 11 +++++++++++
>  kernel/bpf/bpf_iter.c          | 20 +++++++++++++++-----
>  kernel/bpf/helpers.c           | 23 +++++++++++++++++++++++
>  kernel/trace/bpf_trace.c       |  2 ++
>  tools/include/uapi/linux/bpf.h | 11 +++++++++++
>  6 files changed, 63 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index dce54eb0aae8..29f174c08126 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2220,6 +2220,7 @@ extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
>  extern const struct bpf_func_proto bpf_find_vma_proto;
>  extern const struct bpf_func_proto bpf_loop_proto;
>  extern const struct bpf_func_proto bpf_strncmp_proto;
> +extern const struct bpf_func_proto bpf_access_process_vm_proto;
>
>  const struct bpf_func_proto *tracing_prog_func_proto(
>    enum bpf_func_id func_id, const struct bpf_prog *prog);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index fe2272defcd9..2ac56e2512df 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5049,6 +5049,16 @@ union bpf_attr {
>   *             This helper is currently supported by cgroup programs only.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
> + *
> + * long bpf_access_process_vm(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
> + *     Description
> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
> + *             address space, and stores the data in *dst*. *flags* is not
> + *             used yet and is provided for future extensibility. This is a
> + *             wrapper of **access_process_vm**\ ().
> + *     Return
> + *             The number of bytes written to the buffer, or a negative error
> + *             in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5239,6 +5249,7 @@ union bpf_attr {
>         FN(get_func_arg_cnt),           \
>         FN(get_retval),                 \
>         FN(set_retval),                 \
> +       FN(access_process_vm),          \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index b7aef5b3416d..110029ede71e 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -5,6 +5,7 @@
>  #include <linux/anon_inodes.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
> +#include <linux/rcupdate_trace.h>
>
>  struct bpf_iter_target_info {
>         struct list_head list;
> @@ -684,11 +685,20 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  {
>         int ret;
>
> -       rcu_read_lock();
> -       migrate_disable();
> -       ret = bpf_prog_run(prog, ctx);
> -       migrate_enable();
> -       rcu_read_unlock();
> +       if (prog->aux->sleepable) {
> +               rcu_read_lock_trace();
> +               migrate_disable();
> +               might_fault();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock_trace();
> +       } else {
> +               rcu_read_lock();
> +               migrate_disable();
> +               ret = bpf_prog_run(prog, ctx);
> +               migrate_enable();
> +               rcu_read_unlock();
> +       }
>
>         /* bpf program can only return 0 or 1:
>          *  0 : okay
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 01cfdf40c838..9d7e86edc48e 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -16,6 +16,7 @@
>  #include <linux/pid_namespace.h>
>  #include <linux/proc_ns.h>
>  #include <linux/security.h>
> +#include <linux/btf_ids.h>
>
>  #include "../../lib/kstrtox.h"
>
> @@ -671,6 +672,28 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
>         .arg3_type      = ARG_ANYTHING,
>  };
>
> +BPF_CALL_5(bpf_access_process_vm, void *, dst, u32, size,
> +          const void __user *, user_ptr, struct task_struct *, tsk,
> +          u64, flags)
> +{
> +       /* flags is not used yet */
> +       if (flags)
> +               return -EINVAL;
> +       return access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
> +}
> +
> +const struct bpf_func_proto bpf_access_process_vm_proto = {
> +       .func           = bpf_access_process_vm,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
> +       .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
> +       .arg3_type      = ARG_ANYTHING,
> +       .arg4_type      = ARG_PTR_TO_BTF_ID,
> +       .arg4_btf_id    = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
> +       .arg5_type      = ARG_ANYTHING
> +};
> +
>  BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
>  {
>         if (cpu >= nr_cpu_ids)
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 21aa30644219..1a6a81ce2e36 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1257,6 +1257,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_find_vma_proto;
>         case BPF_FUNC_trace_vprintk:
>                 return bpf_get_trace_vprintk_proto();
> +       case BPF_FUNC_access_process_vm:
> +               return prog->aux->sleepable ? &bpf_access_process_vm_proto : NULL;
>         default:
>                 return bpf_base_func_proto(func_id);
>         }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index fe2272defcd9..2ac56e2512df 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5049,6 +5049,16 @@ union bpf_attr {
>   *             This helper is currently supported by cgroup programs only.
>   *     Return
>   *             0 on success, or a negative error in case of failure.
> + *
> + * long bpf_access_process_vm(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
> + *     Description
> + *             Read *size* bytes from user space address *user_ptr* in *tsk*'s
> + *             address space, and stores the data in *dst*. *flags* is not
> + *             used yet and is provided for future extensibility. This is a
> + *             wrapper of **access_process_vm**\ ().
> + *     Return
> + *             The number of bytes written to the buffer, or a negative error
> + *             in case of failure.

wait, can it read less than *size* and return success?

bpf_probe_read_kernel() returns:

0 on success, or a negative error in case of failure.

Let's be consistent. Returning the number of read bytes makes more
sense in cases when we don't know the amount of bytes to be actually
read ahead of time (e.g., when reading zero-terminated strings).

BTW, should we also add a C string reading helper as well, just like
there is bpf_probe_read_user_str() and bpf_probe_read_user()?

Another thing, I think it's important to mention that this helper can
be used only from sleepable BPF programs.

And not to start the bikeshedding session, but we have
bpf_copy_from_user(), wouldn't something like
bpf_copy_from_user_{vm,process,remote}() be more in line and less
surprising for BPF users. BTW, "access" implies writing just as much
as reading, so using "access" in the sense of "read" seems wrong and
confusing.


>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5239,6 +5249,7 @@ union bpf_attr {
>         FN(get_func_arg_cnt),           \
>         FN(get_retval),                 \
>         FN(set_retval),                 \
> +       FN(access_process_vm),          \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.30.2
>

  reply	other threads:[~2022-01-20 22:46 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-13 23:31 [PATCH bpf-next 0/3] Add bpf_access_process_vm helper and sleepable bpf iterator programs Kenny Yu
2022-01-13 23:31 ` [PATCH bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-13 23:31 ` [PATCH bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-13 23:31 ` [PATCH bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-14  0:48 ` [PATCH v2 bpf-next 0/4] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-14  0:48   ` [PATCH v2 bpf-next 1/4] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-14  0:48   ` [PATCH v2 bpf-next 2/4] bpf: Add support for sleepable programs in bpf_iter_run_prog Kenny Yu
2022-01-14  2:50     ` Alexei Starovoitov
2022-01-14 23:15       ` Kenny Yu
2022-01-14  0:48   ` [PATCH v2 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-14  0:49   ` [PATCH v2 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
2022-01-14  2:39     ` Alexei Starovoitov
2022-01-14 23:14       ` Kenny Yu
2022-01-15  1:38         ` Andrii Nakryiko
2022-01-15  4:30           ` Kenny Yu
2022-01-15 16:27           ` Gabriele
2022-01-19 16:56             ` Kenny Yu
2022-01-19 18:02 ` [PATCH v3 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-19 18:02   ` [PATCH v3 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-19 20:16     ` Alexei Starovoitov
2022-01-19 22:51       ` Kenny Yu
2022-01-19 18:02   ` [PATCH v3 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-19 18:02   ` [PATCH v3 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-19 22:59 ` [PATCH v4 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-19 22:59   ` [PATCH v4 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-20  3:45     ` Yonghong Song
2022-01-20 17:11       ` Kenny Yu
2022-01-19 22:59   ` [PATCH v4 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-19 22:59   ` [PATCH v4 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-20 17:29 ` [PATCH v5 bpf-next 0/3] Add bpf_access_process_vm helper and " Kenny Yu
2022-01-20 17:29   ` [PATCH v5 bpf-next 1/3] bpf: Add bpf_access_process_vm() helper Kenny Yu
2022-01-20 22:45     ` Andrii Nakryiko [this message]
2022-01-21  2:27       ` Alexei Starovoitov
2022-01-21 17:20         ` Andrii Nakryiko
2022-01-21 17:41           ` Kenny Yu
2022-01-21 17:47             ` Alexei Starovoitov
2022-01-21 18:30               ` Kenny Yu
2022-01-20 17:29   ` [PATCH v5 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-20 17:29   ` [PATCH v5 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-20 22:48     ` Andrii Nakryiko
2022-01-21 19:30 ` [PATCH v6 bpf-next 0/3] Add bpf_copy_from_user_task helper and " Kenny Yu
2022-01-21 19:30   ` [PATCH v6 bpf-next 1/3] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
2022-01-21 19:53     ` Andrii Nakryiko
2022-01-21 20:04       ` Yonghong Song
2022-01-21 20:07         ` Andrii Nakryiko
2022-01-21 21:15           ` Yonghong Song
2022-01-24 17:30             ` Kenny Yu
2022-01-22  9:58       ` Gabriele
2022-01-22 10:08         ` Gabriele
2022-01-21 19:30   ` [PATCH v6 bpf-next 2/3] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-21 19:54     ` Andrii Nakryiko
2022-01-21 19:30   ` [PATCH v6 bpf-next 3/3] selftests/bpf: Add test " Kenny Yu
2022-01-21 19:55     ` Andrii Nakryiko
2022-01-24 18:53 ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " Kenny Yu
2022-01-24 18:54   ` [PATCH v7 bpf-next 1/4] bpf: Add support for bpf iterator programs to use sleepable helpers Kenny Yu
2022-01-24 22:19     ` Andrii Nakryiko
2022-01-24 18:54   ` [PATCH v7 bpf-next 2/4] bpf: Add bpf_copy_from_user_task() helper Kenny Yu
2022-01-24 22:18     ` Andrii Nakryiko
2022-01-25  4:06       ` Alexei Starovoitov
2022-01-24 18:54   ` [PATCH v7 bpf-next 3/4] libbpf: Add "iter.s" section for sleepable bpf iterator programs Kenny Yu
2022-01-24 18:54   ` [PATCH v7 bpf-next 4/4] selftests/bpf: Add test " Kenny Yu
2022-01-25  4:10   ` [PATCH v7 bpf-next 0/4] Add bpf_copy_from_user_task helper and " patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAEf4BzbEqSh36mFsrwtMYD6c-=LcJ3XbJsEa1ZatLdWkB+3mtQ@mail.gmail.com' \
    --to=andrii.nakryiko@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kennyyu@fb.com \
    --cc=phoenix1987@gmail.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).