[PATCH bpf-next v3 0/2] libbpf: Add syscall-specific variant of BPF

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH bpf-next v3 0/2] libbpf: Add syscall-specific variant of BPF_KPROBE
@ 2022-02-07 14:31 Hengqi Chen
  2022-02-07 14:31 ` [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro Hengqi Chen
  2022-02-07 14:31 ` [PATCH bpf-next v3 2/2] selftests/bpf: Test " Hengqi Chen
  0 siblings, 2 replies; 8+ messages in thread
From: Hengqi Chen @ 2022-02-07 14:31 UTC (permalink / raw)
  To: bpf; +Cc: andrii, hengqi.chen

Add new macro BPF_KPROBE_SYSCALL, which provides easy access to syscall
input arguments. See [0] and [1] for background.

  [0]: https://github.com/libbpf/libbpf-bootstrap/issues/57
  [1]: https://github.com/libbpf/libbpf/issues/425

v2->v3:
  - Use PT_REGS_SYSCALL_REGS
  - Move selftest to progs/bpf_syscall_macro.c

v1->v2:
  - Use PT_REGS_PARM2_CORE_SYSCALL instead

Hengqi Chen (2):
  libbpf: Add BPF_KPROBE_SYSCALL macro
  selftests/bpf: Test BPF_KPROBE_SYSCALL macro

 tools/lib/bpf/bpf_tracing.h                   | 33 +++++++++++++++++++
 .../bpf/prog_tests/test_bpf_syscall_macro.c   |  6 ++++
 .../selftests/bpf/progs/bpf_syscall_macro.c   | 23 +++++++++++++
 3 files changed, 62 insertions(+)

--
2.30.2

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro
  2022-02-07 14:31 [PATCH bpf-next v3 0/2] libbpf: Add syscall-specific variant of BPF_KPROBE Hengqi Chen
@ 2022-02-07 14:31 ` Hengqi Chen
  2022-02-07 21:58   ` Andrii Nakryiko
  2022-02-07 14:31 ` [PATCH bpf-next v3 2/2] selftests/bpf: Test " Hengqi Chen
  1 sibling, 1 reply; 8+ messages in thread
From: Hengqi Chen @ 2022-02-07 14:31 UTC (permalink / raw)
  To: bpf; +Cc: andrii, hengqi.chen

Add syscall-specific variant of BPF_KPROBE named BPF_KPROBE_SYSCALL ([0]).
The new macro hides the underlying way of getting syscall input arguments.
With the new macro, the following code:

    SEC("kprobe/__x64_sys_close")
    int BPF_KPROBE(do_sys_close, struct pt_regs *regs)
    {
        int fd;

        fd = PT_REGS_PARM1_CORE(regs);
        /* do something with fd */
    }

can be written as:

    SEC("kprobe/__x64_sys_close")
    int BPF_KPROBE_SYSCALL(do_sys_close, int fd)
    {
        /* do something with fd */
    }

  [0] Closes: https://github.com/libbpf/libbpf/issues/425

Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
---
 tools/lib/bpf/bpf_tracing.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index cf980e54d331..7ad9cdea99e1 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -461,4 +461,37 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)

+#define ___bpf_syscall_args0()           ctx
+#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
+#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
+ * tracing syscall functions, like __x64_sys_close. It hides the underlying
+ * platform-specific low-level way of getting syscall input arguments from
+ * struct pt_regs, and provides a familiar typed and named function arguments
+ * syntax and semantics of accessing syscall input parameters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE_SYSCALL(name, args...)				    \
+name(struct pt_regs *ctx);						    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args);				    \
+typeof(name(0)) name(struct pt_regs *ctx)				    \
+{									    \
+	struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);		    \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
+	return ____##name(___bpf_syscall_args(args));			    \
+	_Pragma("GCC diagnostic pop")					    \
+}									    \
+static __attribute__((always_inline)) typeof(name(0))			    \
+____##name(struct pt_regs *ctx, ##args)
+
 #endif
--
2.30.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next v3 2/2] selftests/bpf: Test BPF_KPROBE_SYSCALL macro
  2022-02-07 14:31 [PATCH bpf-next v3 0/2] libbpf: Add syscall-specific variant of BPF_KPROBE Hengqi Chen
  2022-02-07 14:31 ` [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro Hengqi Chen
@ 2022-02-07 14:31 ` Hengqi Chen
  1 sibling, 0 replies; 8+ messages in thread
From: Hengqi Chen @ 2022-02-07 14:31 UTC (permalink / raw)
  To: bpf; +Cc: andrii, hengqi.chen

Add tests for the newly added BPF_KPROBE_SYSCALL macro.

Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
---
 .../bpf/prog_tests/test_bpf_syscall_macro.c   |  6 +++++
 .../selftests/bpf/progs/bpf_syscall_macro.c   | 23 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
index f5f4c8adf539..87d05c8a7a4a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -58,6 +58,12 @@ void test_bpf_syscall_macro(void)
 	ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
 	ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");

+	ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option");
+	ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2");
+	ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3");
+	ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
+	ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
+
 cleanup:
 	bpf_syscall_macro__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index e7c622cb6a39..c1481fcbfacb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -21,6 +21,12 @@ unsigned long arg4_core_cx = 0;
 unsigned long arg4_core = 0;
 unsigned long arg5_core = 0;

+int option_syscall = 0;
+unsigned long arg2_syscall = 0;
+unsigned long arg3_syscall = 0;
+unsigned long arg4_syscall = 0;
+unsigned long arg5_syscall = 0;
+
 const volatile pid_t filter_pid = 0;

 SEC("kprobe/" SYS_PREFIX "sys_prctl")
@@ -56,4 +62,21 @@ int BPF_KPROBE(handle_sys_prctl)
 	return 0;
 }

+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2,
+		       unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != filter_pid)
+		return 0;
+
+	option_syscall = option;
+	arg2_syscall = arg2;
+	arg3_syscall = arg3;
+	arg4_syscall = arg4;
+	arg5_syscall = arg5;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
--
2.30.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro
  2022-02-07 14:31 ` [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro Hengqi Chen
@ 2022-02-07 21:58   ` Andrii Nakryiko
  2022-02-09  5:53     ` Andrii Nakryiko
  0 siblings, 1 reply; 8+ messages in thread
From: Andrii Nakryiko @ 2022-02-07 21:58 UTC (permalink / raw)
  To: Hengqi Chen; +Cc: bpf, Andrii Nakryiko

On Mon, Feb 7, 2022 at 6:31 AM Hengqi Chen <hengqi.chen@gmail.com> wrote:
>
> Add syscall-specific variant of BPF_KPROBE named BPF_KPROBE_SYSCALL ([0]).
> The new macro hides the underlying way of getting syscall input arguments.
> With the new macro, the following code:
>
>     SEC("kprobe/__x64_sys_close")
>     int BPF_KPROBE(do_sys_close, struct pt_regs *regs)
>     {
>         int fd;
>
>         fd = PT_REGS_PARM1_CORE(regs);
>         /* do something with fd */
>     }
>
> can be written as:
>
>     SEC("kprobe/__x64_sys_close")
>     int BPF_KPROBE_SYSCALL(do_sys_close, int fd)
>     {
>         /* do something with fd */
>     }
>
>   [0] Closes: https://github.com/libbpf/libbpf/issues/425
>
> Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
> ---
>  tools/lib/bpf/bpf_tracing.h | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
>
> diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
> index cf980e54d331..7ad9cdea99e1 100644
> --- a/tools/lib/bpf/bpf_tracing.h
> +++ b/tools/lib/bpf/bpf_tracing.h
> @@ -461,4 +461,37 @@ typeof(name(0)) name(struct pt_regs *ctx)                              \
>  }                                                                          \
>  static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
>
> +#define ___bpf_syscall_args0()           ctx
> +#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
> +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
> +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
> +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
> +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
> +#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
> +
> +/*
> + * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
> + * tracing syscall functions, like __x64_sys_close. It hides the underlying
> + * platform-specific low-level way of getting syscall input arguments from
> + * struct pt_regs, and provides a familiar typed and named function arguments
> + * syntax and semantics of accessing syscall input parameters.
> + *
> + * Original struct pt_regs* context is preserved as 'ctx' argument. This might
> + * be necessary when using BPF helpers like bpf_perf_event_output().
> + */

LGTM. Please also mention that this macro relies on CO-RE so that
users are aware.

Unfortunately we had to back out Ilya's patches with
PT_REGS_SYSCALL_REGS() and PT_REGS_PARMx_CORE_SYSCALL(), so we'll need
to wait a bit before merging this.


> +#define BPF_KPROBE_SYSCALL(name, args...)                                  \
> +name(struct pt_regs *ctx);                                                 \
> +static __attribute__((always_inline)) typeof(name(0))                      \
> +____##name(struct pt_regs *ctx, ##args);                                   \
> +typeof(name(0)) name(struct pt_regs *ctx)                                  \
> +{                                                                          \
> +       struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);                   \
> +       _Pragma("GCC diagnostic push")                                      \
> +       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")              \
> +       return ____##name(___bpf_syscall_args(args));                       \
> +       _Pragma("GCC diagnostic pop")                                       \
> +}                                                                          \
> +static __attribute__((always_inline)) typeof(name(0))                      \
> +____##name(struct pt_regs *ctx, ##args)
> +
>  #endif
> --
> 2.30.2

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro
  2022-02-07 21:58   ` Andrii Nakryiko
@ 2022-02-09  5:53     ` Andrii Nakryiko
  2022-02-09 16:38       ` Alexei Starovoitov
  2022-02-10 16:52       ` Hengqi Chen
  0 siblings, 2 replies; 8+ messages in thread
From: Andrii Nakryiko @ 2022-02-09  5:53 UTC (permalink / raw)
  To: Hengqi Chen; +Cc: bpf, Andrii Nakryiko

On Mon, Feb 7, 2022 at 1:58 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Feb 7, 2022 at 6:31 AM Hengqi Chen <hengqi.chen@gmail.com> wrote:
> >
> > Add syscall-specific variant of BPF_KPROBE named BPF_KPROBE_SYSCALL ([0]).
> > The new macro hides the underlying way of getting syscall input arguments.
> > With the new macro, the following code:
> >
> >     SEC("kprobe/__x64_sys_close")
> >     int BPF_KPROBE(do_sys_close, struct pt_regs *regs)
> >     {
> >         int fd;
> >
> >         fd = PT_REGS_PARM1_CORE(regs);
> >         /* do something with fd */
> >     }
> >
> > can be written as:
> >
> >     SEC("kprobe/__x64_sys_close")
> >     int BPF_KPROBE_SYSCALL(do_sys_close, int fd)
> >     {
> >         /* do something with fd */
> >     }
> >
> >   [0] Closes: https://github.com/libbpf/libbpf/issues/425
> >
> > Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
> > ---
> >  tools/lib/bpf/bpf_tracing.h | 33 +++++++++++++++++++++++++++++++++
> >  1 file changed, 33 insertions(+)
> >
> > diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
> > index cf980e54d331..7ad9cdea99e1 100644
> > --- a/tools/lib/bpf/bpf_tracing.h
> > +++ b/tools/lib/bpf/bpf_tracing.h
> > @@ -461,4 +461,37 @@ typeof(name(0)) name(struct pt_regs *ctx)                              \
> >  }                                                                          \
> >  static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
> >
> > +#define ___bpf_syscall_args0()           ctx
> > +#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
> > +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
> > +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
> > +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
> > +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
> > +#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
> > +
> > +/*
> > + * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
> > + * tracing syscall functions, like __x64_sys_close. It hides the underlying
> > + * platform-specific low-level way of getting syscall input arguments from
> > + * struct pt_regs, and provides a familiar typed and named function arguments
> > + * syntax and semantics of accessing syscall input parameters.
> > + *
> > + * Original struct pt_regs* context is preserved as 'ctx' argument. This might
> > + * be necessary when using BPF helpers like bpf_perf_event_output().
> > + */
>
> LGTM. Please also mention that this macro relies on CO-RE so that
> users are aware.
>

Now that Ilya's fixes are in again, added a small note about reliance
on BPF CO-RE and pushed to bpf-next, thanks.


On a relevant note. The whole __x64_sys_close vs sys_close depending
on architecture and kernel version was always super annoying. BCC
makes this transparent to users (AFAIK) and it always bothered me a
little, but I didn't see a clean solution that fits libbpf.

I think I finally found it, though. Instead of guessing whether the
kprobe function is a syscall or not based on "sys_" prefix of a kernel
function, we can use libbpf SEC() handling to do this transparently.
What if we define two new SEC() definitions:

SEC("ksyscall/write") and SEC("kretsyscall/write") (or maybe
SEC("kprobe.syscall/write") and SEC("kretprobe.syscall/write"), not
sure which one is better, voice your opinion, please). And for such
special kprobes, libbpf will perform feature detection of this
ARCH_SYSCALL_WRAPPER (we'll need to see the best way to do this in a
simple and fast way, preferably without parsing kallsyms) and
depending on it substitute either sys_write (or should it be
__se_sys_write, according to Naveen) or __<arch>_sys_write. You get
the idea.

I like that this is still explicit and in the spirit of libbpf, but
offloads the burden of knowing these intricate differences from users.

Thoughts?


> Unfortunately we had to back out Ilya's patches with
> PT_REGS_SYSCALL_REGS() and PT_REGS_PARMx_CORE_SYSCALL(), so we'll need
> to wait a bit before merging this.
>
>
> > +#define BPF_KPROBE_SYSCALL(name, args...)                                  \
> > +name(struct pt_regs *ctx);                                                 \
> > +static __attribute__((always_inline)) typeof(name(0))                      \
> > +____##name(struct pt_regs *ctx, ##args);                                   \
> > +typeof(name(0)) name(struct pt_regs *ctx)                                  \
> > +{                                                                          \
> > +       struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);                   \
> > +       _Pragma("GCC diagnostic push")                                      \
> > +       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")              \
> > +       return ____##name(___bpf_syscall_args(args));                       \
> > +       _Pragma("GCC diagnostic pop")                                       \
> > +}                                                                          \
> > +static __attribute__((always_inline)) typeof(name(0))                      \
> > +____##name(struct pt_regs *ctx, ##args)
> > +
> >  #endif
> > --
> > 2.30.2

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro
  2022-02-09  5:53     ` Andrii Nakryiko
@ 2022-02-09 16:38       ` Alexei Starovoitov
  2022-02-09 17:47         ` Andrii Nakryiko
  2022-02-10 16:52       ` Hengqi Chen
  1 sibling, 1 reply; 8+ messages in thread
From: Alexei Starovoitov @ 2022-02-09 16:38 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Hengqi Chen, bpf, Andrii Nakryiko

On Wed, Feb 9, 2022 at 2:25 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Feb 7, 2022 at 1:58 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Mon, Feb 7, 2022 at 6:31 AM Hengqi Chen <hengqi.chen@gmail.com> wrote:
> > >
> > > Add syscall-specific variant of BPF_KPROBE named BPF_KPROBE_SYSCALL ([0]).
> > > The new macro hides the underlying way of getting syscall input arguments.
> > > With the new macro, the following code:
> > >
> > >     SEC("kprobe/__x64_sys_close")
> > >     int BPF_KPROBE(do_sys_close, struct pt_regs *regs)
> > >     {
> > >         int fd;
> > >
> > >         fd = PT_REGS_PARM1_CORE(regs);
> > >         /* do something with fd */
> > >     }
> > >
> > > can be written as:
> > >
> > >     SEC("kprobe/__x64_sys_close")
> > >     int BPF_KPROBE_SYSCALL(do_sys_close, int fd)
> > >     {
> > >         /* do something with fd */
> > >     }
> > >
> > >   [0] Closes: https://github.com/libbpf/libbpf/issues/425
> > >
> > > Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
> > > ---
> > >  tools/lib/bpf/bpf_tracing.h | 33 +++++++++++++++++++++++++++++++++
> > >  1 file changed, 33 insertions(+)
> > >
> > > diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
> > > index cf980e54d331..7ad9cdea99e1 100644
> > > --- a/tools/lib/bpf/bpf_tracing.h
> > > +++ b/tools/lib/bpf/bpf_tracing.h
> > > @@ -461,4 +461,37 @@ typeof(name(0)) name(struct pt_regs *ctx)                              \
> > >  }                                                                          \
> > >  static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
> > >
> > > +#define ___bpf_syscall_args0()           ctx
> > > +#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
> > > +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
> > > +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
> > > +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
> > > +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
> > > +#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
> > > +
> > > +/*
> > > + * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
> > > + * tracing syscall functions, like __x64_sys_close. It hides the underlying
> > > + * platform-specific low-level way of getting syscall input arguments from
> > > + * struct pt_regs, and provides a familiar typed and named function arguments
> > > + * syntax and semantics of accessing syscall input parameters.
> > > + *
> > > + * Original struct pt_regs* context is preserved as 'ctx' argument. This might
> > > + * be necessary when using BPF helpers like bpf_perf_event_output().
> > > + */
> >
> > LGTM. Please also mention that this macro relies on CO-RE so that
> > users are aware.
> >
>
> Now that Ilya's fixes are in again, added a small note about reliance
> on BPF CO-RE and pushed to bpf-next, thanks.
>
>
> On a relevant note. The whole __x64_sys_close vs sys_close depending
> on architecture and kernel version was always super annoying. BCC
> makes this transparent to users (AFAIK) and it always bothered me a
> little, but I didn't see a clean solution that fits libbpf.
>
> I think I finally found it, though. Instead of guessing whether the
> kprobe function is a syscall or not based on "sys_" prefix of a kernel
> function, we can use libbpf SEC() handling to do this transparently.
> What if we define two new SEC() definitions:
>
> SEC("ksyscall/write") and SEC("kretsyscall/write") (or maybe
> SEC("kprobe.syscall/write") and SEC("kretprobe.syscall/write"), not
> sure which one is better, voice your opinion, please). And for such
> special kprobes, libbpf will perform feature detection of this
> ARCH_SYSCALL_WRAPPER (we'll need to see the best way to do this in a
> simple and fast way, preferably without parsing kallsyms) and
> depending on it substitute either sys_write (or should it be
> __se_sys_write, according to Naveen) or __<arch>_sys_write. You get
> the idea.
>
> I like that this is still explicit and in the spirit of libbpf, but
> offloads the burden of knowing these intricate differences from users.
>
> Thoughts?

I think it will be just as fragile.
That syscall prefix was changed by the kernel few times now.
libbpf will be chasing the moving target.
I think keeping the magic in .h is simpler and less of a maintenance burden.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro
  2022-02-09 16:38       ` Alexei Starovoitov
@ 2022-02-09 17:47         ` Andrii Nakryiko
  0 siblings, 0 replies; 8+ messages in thread
From: Andrii Nakryiko @ 2022-02-09 17:47 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: Hengqi Chen, bpf, Andrii Nakryiko

On Wed, Feb 9, 2022 at 8:38 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Feb 9, 2022 at 2:25 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Mon, Feb 7, 2022 at 1:58 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Mon, Feb 7, 2022 at 6:31 AM Hengqi Chen <hengqi.chen@gmail.com> wrote:
> > > >
> > > > Add syscall-specific variant of BPF_KPROBE named BPF_KPROBE_SYSCALL ([0]).
> > > > The new macro hides the underlying way of getting syscall input arguments.
> > > > With the new macro, the following code:
> > > >
> > > >     SEC("kprobe/__x64_sys_close")
> > > >     int BPF_KPROBE(do_sys_close, struct pt_regs *regs)
> > > >     {
> > > >         int fd;
> > > >
> > > >         fd = PT_REGS_PARM1_CORE(regs);
> > > >         /* do something with fd */
> > > >     }
> > > >
> > > > can be written as:
> > > >
> > > >     SEC("kprobe/__x64_sys_close")
> > > >     int BPF_KPROBE_SYSCALL(do_sys_close, int fd)
> > > >     {
> > > >         /* do something with fd */
> > > >     }
> > > >
> > > >   [0] Closes: https://github.com/libbpf/libbpf/issues/425
> > > >
> > > > Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
> > > > ---
> > > >  tools/lib/bpf/bpf_tracing.h | 33 +++++++++++++++++++++++++++++++++
> > > >  1 file changed, 33 insertions(+)
> > > >
> > > > diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
> > > > index cf980e54d331..7ad9cdea99e1 100644
> > > > --- a/tools/lib/bpf/bpf_tracing.h
> > > > +++ b/tools/lib/bpf/bpf_tracing.h
> > > > @@ -461,4 +461,37 @@ typeof(name(0)) name(struct pt_regs *ctx)                              \
> > > >  }                                                                          \
> > > >  static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
> > > >
> > > > +#define ___bpf_syscall_args0()           ctx
> > > > +#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
> > > > +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
> > > > +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
> > > > +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
> > > > +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
> > > > +#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
> > > > +
> > > > +/*
> > > > + * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
> > > > + * tracing syscall functions, like __x64_sys_close. It hides the underlying
> > > > + * platform-specific low-level way of getting syscall input arguments from
> > > > + * struct pt_regs, and provides a familiar typed and named function arguments
> > > > + * syntax and semantics of accessing syscall input parameters.
> > > > + *
> > > > + * Original struct pt_regs* context is preserved as 'ctx' argument. This might
> > > > + * be necessary when using BPF helpers like bpf_perf_event_output().
> > > > + */
> > >
> > > LGTM. Please also mention that this macro relies on CO-RE so that
> > > users are aware.
> > >
> >
> > Now that Ilya's fixes are in again, added a small note about reliance
> > on BPF CO-RE and pushed to bpf-next, thanks.
> >
> >
> > On a relevant note. The whole __x64_sys_close vs sys_close depending
> > on architecture and kernel version was always super annoying. BCC
> > makes this transparent to users (AFAIK) and it always bothered me a
> > little, but I didn't see a clean solution that fits libbpf.
> >
> > I think I finally found it, though. Instead of guessing whether the
> > kprobe function is a syscall or not based on "sys_" prefix of a kernel
> > function, we can use libbpf SEC() handling to do this transparently.
> > What if we define two new SEC() definitions:
> >
> > SEC("ksyscall/write") and SEC("kretsyscall/write") (or maybe
> > SEC("kprobe.syscall/write") and SEC("kretprobe.syscall/write"), not
> > sure which one is better, voice your opinion, please). And for such
> > special kprobes, libbpf will perform feature detection of this
> > ARCH_SYSCALL_WRAPPER (we'll need to see the best way to do this in a
> > simple and fast way, preferably without parsing kallsyms) and
> > depending on it substitute either sys_write (or should it be
> > __se_sys_write, according to Naveen) or __<arch>_sys_write. You get
> > the idea.
> >
> > I like that this is still explicit and in the spirit of libbpf, but
> > offloads the burden of knowing these intricate differences from users.
> >
> > Thoughts?
>
> I think it will be just as fragile.
> That syscall prefix was changed by the kernel few times now.
> libbpf will be chasing the moving target.
> I think keeping the magic in .h is simpler and less of a maintenance burden.

Absolutely it is simpler, but it only works for selftests (and even
then, when prefix changes again we'll need to adjust selftests). But
the point here is to not require end users to chase this instead. And
in the real world where users want to work on as wide a range of
kernel versions as possible SYS_PREFIX trick doesn't work (which is
why I didn't want to add that to bpf_tracing.h).

It's cheaper (maintenance-wise) to do it in libbpf than require all
the kprobe users to keep track of this, no? And good thing is that
libbpf is part of the kernel repo, so whenever someone changes this in
the kernel we'll get a failing selftest on next net-next -> bpf->next
sync (at least for architectures that we do test), so we'll be able to
fix and adjust quickly.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro
  2022-02-09  5:53     ` Andrii Nakryiko
  2022-02-09 16:38       ` Alexei Starovoitov
@ 2022-02-10 16:52       ` Hengqi Chen
  1 sibling, 0 replies; 8+ messages in thread
From: Hengqi Chen @ 2022-02-10 16:52 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: bpf, Andrii Nakryiko



On 2/9/22 1:53 PM, Andrii Nakryiko wrote:
> On Mon, Feb 7, 2022 at 1:58 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>>
>> On Mon, Feb 7, 2022 at 6:31 AM Hengqi Chen <hengqi.chen@gmail.com> wrote:
>>>

...

>>
> 
> Now that Ilya's fixes are in again, added a small note about reliance
> on BPF CO-RE and pushed to bpf-next, thanks.
> 
> 

Thanks.

> On a relevant note. The whole __x64_sys_close vs sys_close depending
> on architecture and kernel version was always super annoying. BCC
> makes this transparent to users (AFAIK) and it always bothered me a

BCC does this by 'guessing' the syscall prefix ([0]).

  [0]: https://github.com/iovisor/bcc/blob/v0.24.0/src/python/bcc/__init__.py#L787-L794

> little, but I didn't see a clean solution that fits libbpf.
> 
> I think I finally found it, though. Instead of guessing whether the
> kprobe function is a syscall or not based on "sys_" prefix of a kernel
> function, we can use libbpf SEC() handling to do this transparently.
> What if we define two new SEC() definitions:
> 
> SEC("ksyscall/write") and SEC("kretsyscall/write") (or maybe
> SEC("kprobe.syscall/write") and SEC("kretprobe.syscall/write"), not
> sure which one is better, voice your opinion, please). And for such

ksyscall/kretsyscall is more succinct and in the same vein as kfunc/kretfunc.

> special kprobes, libbpf will perform feature detection of this
> ARCH_SYSCALL_WRAPPER (we'll need to see the best way to do this in a
> simple and fast way, preferably without parsing kallsyms) and

By detecting special structs like struct cpuinfo_x86/cpuinfo_arm64 in BTF ?

In a word, I like the idea and it would make libbpf-based tools more portable. :)

> depending on it substitute either sys_write (or should it be
> __se_sys_write, according to Naveen) or __<arch>_sys_write. You get
> the idea.
> 
> I like that this is still explicit and in the spirit of libbpf, but
> offloads the burden of knowing these intricate differences from users.
> 
> Thoughts?
> 
> 
>> Unfortunately we had to back out Ilya's patches with
>> PT_REGS_SYSCALL_REGS() and PT_REGS_PARMx_CORE_SYSCALL(), so we'll need
>> to wait a bit before merging this.
>>
>>
>>> +#define BPF_KPROBE_SYSCALL(name, args...)                                  \
>>> +name(struct pt_regs *ctx);                                                 \
>>> +static __attribute__((always_inline)) typeof(name(0))                      \
>>> +____##name(struct pt_regs *ctx, ##args);                                   \
>>> +typeof(name(0)) name(struct pt_regs *ctx)                                  \
>>> +{                                                                          \
>>> +       struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);                   \
>>> +       _Pragma("GCC diagnostic push")                                      \
>>> +       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")              \
>>> +       return ____##name(___bpf_syscall_args(args));                       \
>>> +       _Pragma("GCC diagnostic pop")                                       \
>>> +}                                                                          \
>>> +static __attribute__((always_inline)) typeof(name(0))                      \
>>> +____##name(struct pt_regs *ctx, ##args)
>>> +
>>>  #endif
>>> --
>>> 2.30.2

--
Hengqi

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-02-10 16:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-07 14:31 [PATCH bpf-next v3 0/2] libbpf: Add syscall-specific variant of BPF_KPROBE Hengqi Chen
2022-02-07 14:31 ` [PATCH bpf-next v3 1/2] libbpf: Add BPF_KPROBE_SYSCALL macro Hengqi Chen
2022-02-07 21:58   ` Andrii Nakryiko
2022-02-09  5:53     ` Andrii Nakryiko
2022-02-09 16:38       ` Alexei Starovoitov
2022-02-09 17:47         ` Andrii Nakryiko
2022-02-10 16:52       ` Hengqi Chen
2022-02-07 14:31 ` [PATCH bpf-next v3 2/2] selftests/bpf: Test " Hengqi Chen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.