bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments
@ 2021-03-18  6:25 Rafael David Tinoco
  2021-03-18 19:31 ` [PATCH] libbpf: allow bpf object kern_version to be overridden Rafael David Tinoco
  2021-03-19  4:51 ` [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Andrii Nakryiko
  0 siblings, 2 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-03-18  6:25 UTC (permalink / raw)
  To: andrii.nakryiko; +Cc: rafaeldtinoco, bpf

 * Request for comments version (still needs polishing).
 * Based on Andrii Nakryiko's suggestion.
 * Using bpf_program__set_priv in attach_kprobe() for kprobe cleanup.

Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
---
 src/libbpf.c | 100 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/src/libbpf.c b/src/libbpf.c
index 2f351d3..4dc09d3 100644
--- a/src/libbpf.c
+++ b/src/libbpf.c
@@ -9677,8 +9677,14 @@ static int parse_uint_from_file(const char *file, const char *fmt)
 
 static int determine_kprobe_perf_type(void)
 {
+	int ret = 0;
+	struct stat s;
 	const char *file = "/sys/bus/event_source/devices/kprobe/type";
 
+	ret = stat(file, &s);
+	if (ret < 0)
+		return -errno;
+
 	return parse_uint_from_file(file, "%d\n");
 }
 
@@ -9703,25 +9709,87 @@ static int determine_uprobe_retprobe_bit(void)
 	return parse_uint_from_file(file, "config:%d\n");
 }
 
+static int determine_kprobe_perf_type_legacy(const char *func_name)
+{
+	char file[256];
+	const char *fname = "/sys/kernel/debug/tracing/events/kprobes/%s/id";
+
+	snprintf(file, sizeof(file), fname, func_name);
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int poke_kprobe_events(bool add, const char *name, bool kretprobe)
+{
+	int fd, ret = 0;
+	char given[256], buf[256];
+	const char *file = "/sys/kernel/debug/tracing/kprobe_events";
+
+	if (kretprobe && add)
+		snprintf(given, sizeof(given), "kprobes/%s_ret", name);
+	else
+		snprintf(given, sizeof(given), "kprobes/%s", name);
+	if (add)
+		snprintf(buf, sizeof(buf),"%c:%s %s\n", kretprobe ? 'r' : 'p', given, name);
+	else
+		snprintf(buf, sizeof(buf), "-:%s\n", given);
+
+	fd = open(file, O_WRONLY|O_APPEND, 0);
+	if (!fd)
+		return -errno;
+	ret = write(fd, buf, strlen(buf));
+	if (ret < 0) {
+		ret = -errno;
+	}
+	close(fd);
+
+	return ret;
+}
+
+static inline int add_kprobe_event_legacy(const char* func_name, bool kretprobe)
+{
+	return poke_kprobe_events(true /*add*/, func_name, kretprobe);
+}
+
+static inline int remove_kprobe_event_legacy(const char* func_name, bool kretprobe)
+{
+	return poke_kprobe_events(false /*remove*/, func_name, kretprobe);
+}
+
 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 				 uint64_t offset, int pid)
 {
 	struct perf_event_attr attr = {};
 	char errmsg[STRERR_BUFSIZE];
 	int type, pfd, err;
+	bool legacy = false;
 
 	type = uprobe ? determine_uprobe_perf_type()
 		      : determine_kprobe_perf_type();
 	if (type < 0) {
-		pr_warn("failed to determine %s perf type: %s\n",
-			uprobe ? "uprobe" : "kprobe",
-			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
-		return type;
+		if (uprobe) {
+			pr_warn("failed to determine %s perf type: %s\n",
+				uprobe ? "uprobe" : "kprobe",
+				libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+			return type;
+		}
+		err = add_kprobe_event_legacy(name, retprobe);
+		if (err < 0) {
+			pr_warn("failed to add legacy kprobe events: %s\n",
+				libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+			return err;
+		}
+		type = uprobe ? type : determine_kprobe_perf_type_legacy(name);
+		if (type < 0) {
+			remove_kprobe_event_legacy(name, retprobe);
+			pr_warn("failed to determine kprobe perf type: %s\n",
+				libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		}
+		legacy = true;
 	}
-	if (retprobe) {
+	if (retprobe && !legacy) {
 		int bit = uprobe ? determine_uprobe_retprobe_bit()
 				 : determine_kprobe_retprobe_bit();
-
 		if (bit < 0) {
 			pr_warn("failed to determine %s retprobe bit: %s\n",
 				uprobe ? "uprobe" : "kprobe",
@@ -9731,10 +9799,14 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 		attr.config |= 1 << bit;
 	}
 	attr.size = sizeof(attr);
-	attr.type = type;
-	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
-	attr.config2 = offset;		 /* kprobe_addr or probe_offset */
-
+	if (!legacy) {
+		attr.type = type;
+		attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
+		attr.config2 = offset;		 /* kprobe_addr or probe_offset */
+	} else {
+		attr.config = type;
+		attr.type = PERF_TYPE_TRACEPOINT;
+	}
 	/* pid filter is meaningful only for uprobes */
 	pfd = syscall(__NR_perf_event_open, &attr,
 		      pid < 0 ? -1 : pid /* pid */,
@@ -9750,6 +9822,11 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 	return pfd;
 }
 
+void bpf_program__detach_kprobe_legacy(struct bpf_program *prog, void *retprobe)
+{
+	remove_kprobe_event_legacy(prog->name, (bool) retprobe);
+}
+
 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 					    bool retprobe,
 					    const char *func_name)
@@ -9766,6 +9843,9 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
 		return ERR_PTR(pfd);
 	}
+
+	bpf_program__set_priv(prog, (void *) retprobe, bpf_program__detach_kprobe_legacy);
+
 	link = bpf_program__attach_perf_event(prog, pfd);
 	if (IS_ERR(link)) {
 		close(pfd);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH] libbpf: allow bpf object kern_version to be overridden
  2021-03-18  6:25 [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Rafael David Tinoco
@ 2021-03-18 19:31 ` Rafael David Tinoco
  2021-03-18 19:46   ` Andrii Nakryiko
  2021-03-19  4:51 ` [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Andrii Nakryiko
  1 sibling, 1 reply; 24+ messages in thread
From: Rafael David Tinoco @ 2021-03-18 19:31 UTC (permalink / raw)
  To: rafaeldtinoco; +Cc: andrii.nakryiko, bpf

Unfortunately some distros don't have their accurate kernel version
defined correctly in version.h due to long term support decisions. This
makes LINUX_VERSION_CODE to be defined as the original upstream version
in header, while the running in-kernel version is different.

Older kernels might still check kern_version during bpf_prog_load(),
making it impossible to load a program that could still be portable.
This patch allows one to override bpf objects kern_version attributes,
so program can bypass this in-kernel check during load.

Example:

A kernel 4.15.0-129.132, for example, might have 4.15.18 version defined
in its headers, which is the kernel it is based on, but have a 4.15.0
version defined within the kernel due to other factors.

$ export LIBBPF_KERN_VERSION=4.15.18
$ sudo -E ./portablebpf -v
...
libbpf: bpf object: kernel_version enforced by env variable: 266002
...

This will also help portable binaries within similar older kernels, as
long as they have their BTF data converted from DWARVES (for example).

Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
---
 src/libbpf.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/libbpf.c b/src/libbpf.c
index 4dc09d3..cbc6e61 100644
--- a/src/libbpf.c
+++ b/src/libbpf.c
@@ -708,13 +708,19 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 
 static __u32 get_kernel_version(void)
 {
-	__u32 major, minor, patch;
 	struct utsname info;
+	__u32 major, minor, patch, ver;
+	char *ptr, *e = getenv("LIBBPF_KERN_VERSION");
 
 	uname(&info);
-	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+	ptr = (e != NULL) ? e : (char *) &info.release;
+	if (sscanf(ptr, "%u.%u.%u", &major, &minor, &patch) != 3)
 		return 0;
-	return KERNEL_VERSION(major, minor, patch);
+	ver = KERNEL_VERSION(major, minor, patch);
+	if (e)
+		pr_debug("bpf object: kernel_version enforced by env variable: %u\n", ver);
+
+	return ver;
 }
 
 static const struct btf_member *
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] libbpf: allow bpf object kern_version to be overridden
  2021-03-18 19:31 ` [PATCH] libbpf: allow bpf object kern_version to be overridden Rafael David Tinoco
@ 2021-03-18 19:46   ` Andrii Nakryiko
  2021-03-18 20:56     ` Daniel Borkmann
  0 siblings, 1 reply; 24+ messages in thread
From: Andrii Nakryiko @ 2021-03-18 19:46 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: bpf

On Thu, Mar 18, 2021 at 12:31 PM Rafael David Tinoco
<rafaeldtinoco@ubuntu.com> wrote:
>
> Unfortunately some distros don't have their accurate kernel version
> defined correctly in version.h due to long term support decisions. This
> makes LINUX_VERSION_CODE to be defined as the original upstream version
> in header, while the running in-kernel version is different.
>
> Older kernels might still check kern_version during bpf_prog_load(),
> making it impossible to load a program that could still be portable.
> This patch allows one to override bpf objects kern_version attributes,
> so program can bypass this in-kernel check during load.
>
> Example:
>
> A kernel 4.15.0-129.132, for example, might have 4.15.18 version defined
> in its headers, which is the kernel it is based on, but have a 4.15.0
> version defined within the kernel due to other factors.
>
> $ export LIBBPF_KERN_VERSION=4.15.18
> $ sudo -E ./portablebpf -v
> ...
> libbpf: bpf object: kernel_version enforced by env variable: 266002
> ...
>
> This will also help portable binaries within similar older kernels, as
> long as they have their BTF data converted from DWARVES (for example).
>
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
> ---

Libbpf currently provides a way to specify custom kernel version using
a special "version" ELF section:

int _version SEC("version") = 123;

But it seems like you would want to set it at runtime, so this
compile-time approach might be problematic. But instead of hijacking
get_kernel_version(), it's better to add a simple setter counterpart
to bpf_object__kversion() that would just override obj->kern_version.

>  src/libbpf.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/src/libbpf.c b/src/libbpf.c
> index 4dc09d3..cbc6e61 100644
> --- a/src/libbpf.c
> +++ b/src/libbpf.c
> @@ -708,13 +708,19 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
>
>  static __u32 get_kernel_version(void)
>  {
> -       __u32 major, minor, patch;
>         struct utsname info;
> +       __u32 major, minor, patch, ver;
> +       char *ptr, *e = getenv("LIBBPF_KERN_VERSION");
>
>         uname(&info);
> -       if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
> +       ptr = (e != NULL) ? e : (char *) &info.release;
> +       if (sscanf(ptr, "%u.%u.%u", &major, &minor, &patch) != 3)
>                 return 0;
> -       return KERNEL_VERSION(major, minor, patch);
> +       ver = KERNEL_VERSION(major, minor, patch);
> +       if (e)
> +               pr_debug("bpf object: kernel_version enforced by env variable: %u\n", ver);
> +
> +       return ver;
>  }
>
>  static const struct btf_member *
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libbpf: allow bpf object kern_version to be overridden
  2021-03-18 19:46   ` Andrii Nakryiko
@ 2021-03-18 20:56     ` Daniel Borkmann
  2021-03-19  4:38       ` Rafael David Tinoco
  0 siblings, 1 reply; 24+ messages in thread
From: Daniel Borkmann @ 2021-03-18 20:56 UTC (permalink / raw)
  To: Andrii Nakryiko, Rafael David Tinoco; +Cc: bpf

On 3/18/21 8:46 PM, Andrii Nakryiko wrote:
> On Thu, Mar 18, 2021 at 12:31 PM Rafael David Tinoco
> <rafaeldtinoco@ubuntu.com> wrote:
>>
>> Unfortunately some distros don't have their accurate kernel version
>> defined correctly in version.h due to long term support decisions. This
>> makes LINUX_VERSION_CODE to be defined as the original upstream version
>> in header, while the running in-kernel version is different.
>>
>> Older kernels might still check kern_version during bpf_prog_load(),
>> making it impossible to load a program that could still be portable.
>> This patch allows one to override bpf objects kern_version attributes,
>> so program can bypass this in-kernel check during load.
>>
>> Example:
>>
>> A kernel 4.15.0-129.132, for example, might have 4.15.18 version defined
>> in its headers, which is the kernel it is based on, but have a 4.15.0
>> version defined within the kernel due to other factors.
>>
>> $ export LIBBPF_KERN_VERSION=4.15.18
>> $ sudo -E ./portablebpf -v
>> ...
>> libbpf: bpf object: kernel_version enforced by env variable: 266002
>> ...
>>
>> This will also help portable binaries within similar older kernels, as
>> long as they have their BTF data converted from DWARVES (for example).
>>
>> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
>> ---
> 
> Libbpf currently provides a way to specify custom kernel version using
> a special "version" ELF section:
> 
> int _version SEC("version") = 123;
> 
> But it seems like you would want to set it at runtime, so this
> compile-time approach might be problematic. But instead of hijacking
> get_kernel_version(), it's better to add a simple setter counterpart
> to bpf_object__kversion() that would just override obj->kern_version.

+1, agree, setter makes sense for old kernels, so the loader app can set/
retrieve from wherever it's needed. In addition, couldn't you also backport
the old commit that ignores the kern_version from kernel side (won't help
existing users, but at least might simplify things once they start upgrade)?

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libbpf: allow bpf object kern_version to be overridden
  2021-03-18 20:56     ` Daniel Borkmann
@ 2021-03-19  4:38       ` Rafael David Tinoco
  0 siblings, 0 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-03-19  4:38 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Andrii Nakryiko, bpf

>>>
>>> This will also help portable binaries within similar older kernels, as
>>> long as they have their BTF data converted from DWARVES (for example).
>>>
>>> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
>>> ---
>> Libbpf currently provides a way to specify custom kernel version using
>> a special "version" ELF section:
>> int _version SEC("version") = 123;
>> But it seems like you would want to set it at runtime, so this
>> compile-time approach might be problematic. But instead of hijacking
>> get_kernel_version(), it's better to add a simple setter counterpart
>> to bpf_object__kversion() that would just override obj->kern_version.
>
> +1, agree, setter makes sense for old kernels, so the loader app can set/
> retrieve from wherever it's needed. In addition, couldn't you also backport
> the old commit that ignores the kern_version from kernel side (won't help
> existing users, but at least might simplify things once they start  
> upgrade)?

Yep :\ that was my initial approach and I went for something simpler.
Will propose something else. Once I finish this, I’ll propose the
back-ports removing the version check to the kernel team. Not having
the attr version check will definitely help this, for now needed,
mitigation to fade away.

Thanks for the reviews, would appreciate comments on:

[RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments

if possible, as both walk together supporting legacy kernels.

tks
-rafaeldtinoco

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments
  2021-03-18  6:25 [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Rafael David Tinoco
  2021-03-18 19:31 ` [PATCH] libbpf: allow bpf object kern_version to be overridden Rafael David Tinoco
@ 2021-03-19  4:51 ` Andrii Nakryiko
  2021-03-22 18:04   ` [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support Rafael David Tinoco
  1 sibling, 1 reply; 24+ messages in thread
From: Andrii Nakryiko @ 2021-03-19  4:51 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: bpf

On Wed, Mar 17, 2021 at 11:25 PM Rafael David Tinoco
<rafaeldtinoco@ubuntu.com> wrote:
>
>  * Request for comments version (still needs polishing).
>  * Based on Andrii Nakryiko's suggestion.
>  * Using bpf_program__set_priv in attach_kprobe() for kprobe cleanup.

no-no-no, set_priv() is going to be deprecated and removed (see [0]),
and is not the right mechanism here. Detachment should happen in
bpf_link__destroy().

  [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY

>
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
> ---
>  src/libbpf.c | 100 +++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 90 insertions(+), 10 deletions(-)
>
> diff --git a/src/libbpf.c b/src/libbpf.c
> index 2f351d3..4dc09d3 100644
> --- a/src/libbpf.c
> +++ b/src/libbpf.c
> @@ -9677,8 +9677,14 @@ static int parse_uint_from_file(const char *file, const char *fmt)
>
>  static int determine_kprobe_perf_type(void)
>  {
> +       int ret = 0;
> +       struct stat s;
>         const char *file = "/sys/bus/event_source/devices/kprobe/type";
>
> +       ret = stat(file, &s);
> +       if (ret < 0)
> +               return -errno;
> +
>         return parse_uint_from_file(file, "%d\n");
>  }
>
> @@ -9703,25 +9709,87 @@ static int determine_uprobe_retprobe_bit(void)
>         return parse_uint_from_file(file, "config:%d\n");
>  }
>
> +static int determine_kprobe_perf_type_legacy(const char *func_name)
> +{
> +       char file[256];

nit: I suspect 256 is much longer than necessary :)


> +       const char *fname = "/sys/kernel/debug/tracing/events/kprobes/%s/id";
> +
> +       snprintf(file, sizeof(file), fname, func_name);
> +
> +       return parse_uint_from_file(file, "%d\n");
> +}
> +
> +static int poke_kprobe_events(bool add, const char *name, bool kretprobe)

it's probably a good idea to put a link to [0] somewhere here

  [0] https://www.kernel.org/doc/html/latest/trace/kprobetrace.html

> +{
> +       int fd, ret = 0;
> +       char given[256], buf[256];

nit: given -> event_name, to follow official documentation terminology ?

> +       const char *file = "/sys/kernel/debug/tracing/kprobe_events";
> +
> +       if (kretprobe && add)

what if it's kretprobe removal? shouldn't you generate the same name


> +               snprintf(given, sizeof(given), "kprobes/%s_ret", name);
> +       else
> +               snprintf(given, sizeof(given), "kprobes/%s", name);

BCC includes PID in the name of the probe and "bcc", maybe we should
do something similar?

> +       if (add)
> +               snprintf(buf, sizeof(buf),"%c:%s %s\n", kretprobe ? 'r' : 'p', given, name);
> +       else
> +               snprintf(buf, sizeof(buf), "-:%s\n", given);
> +
> +       fd = open(file, O_WRONLY|O_APPEND, 0);
> +       if (!fd)
> +               return -errno;
> +       ret = write(fd, buf, strlen(buf));
> +       if (ret < 0) {
> +               ret = -errno;
> +       }
> +       close(fd);
> +
> +       return ret;
> +}
> +
> +static inline int add_kprobe_event_legacy(const char* func_name, bool kretprobe)
> +{
> +       return poke_kprobe_events(true /*add*/, func_name, kretprobe);
> +}
> +
> +static inline int remove_kprobe_event_legacy(const char* func_name, bool kretprobe)
> +{
> +       return poke_kprobe_events(false /*remove*/, func_name, kretprobe);
> +}
> +
>  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>                                  uint64_t offset, int pid)
>  {
>         struct perf_event_attr attr = {};
>         char errmsg[STRERR_BUFSIZE];
>         int type, pfd, err;
> +       bool legacy = false;
>
>         type = uprobe ? determine_uprobe_perf_type()
>                       : determine_kprobe_perf_type();
>         if (type < 0) {

I think we should do "feature probing" to decide whether we should go
with legacy or modern kprobes. And just stick to that, reporting any
errors. I'm not a big fan of this generic "let's try X, if it fails
for *whatever* reason, let's try Y", because you 1) can ignore some
serious problem 2) you'll be getting unnecessary warnings in your log

> -               pr_warn("failed to determine %s perf type: %s\n",
> -                       uprobe ? "uprobe" : "kprobe",
> -                       libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> -               return type;
> +               if (uprobe) {
> +                       pr_warn("failed to determine %s perf type: %s\n",
> +                               uprobe ? "uprobe" : "kprobe",
> +                               libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> +                       return type;
> +               }
> +               err = add_kprobe_event_legacy(name, retprobe);
> +               if (err < 0) {
> +                       pr_warn("failed to add legacy kprobe events: %s\n",
> +                               libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +                       return err;
> +               }
> +               type = uprobe ? type : determine_kprobe_perf_type_legacy(name);
> +               if (type < 0) {
> +                       remove_kprobe_event_legacy(name, retprobe);
> +                       pr_warn("failed to determine kprobe perf type: %s\n",
> +                               libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> +               }
> +               legacy = true;
>         }
> -       if (retprobe) {
> +       if (retprobe && !legacy) {
>                 int bit = uprobe ? determine_uprobe_retprobe_bit()
>                                  : determine_kprobe_retprobe_bit();
> -
>                 if (bit < 0) {
>                         pr_warn("failed to determine %s retprobe bit: %s\n",
>                                 uprobe ? "uprobe" : "kprobe",
> @@ -9731,10 +9799,14 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>                 attr.config |= 1 << bit;
>         }
>         attr.size = sizeof(attr);
> -       attr.type = type;
> -       attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
> -       attr.config2 = offset;           /* kprobe_addr or probe_offset */
> -
> +       if (!legacy) {
> +               attr.type = type;
> +               attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
> +               attr.config2 = offset;           /* kprobe_addr or probe_offset */
> +       } else {
> +               attr.config = type;
> +               attr.type = PERF_TYPE_TRACEPOINT;
> +       }
>         /* pid filter is meaningful only for uprobes */
>         pfd = syscall(__NR_perf_event_open, &attr,
>                       pid < 0 ? -1 : pid /* pid */,
> @@ -9750,6 +9822,11 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>         return pfd;
>  }
>
> +void bpf_program__detach_kprobe_legacy(struct bpf_program *prog, void *retprobe)
> +{
> +       remove_kprobe_event_legacy(prog->name, (bool) retprobe);
> +}

as I mentioned, this should be done by bpf_link__destroy()

> +
>  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>                                             bool retprobe,
>                                             const char *func_name)
> @@ -9766,6 +9843,9 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
>                 return ERR_PTR(pfd);
>         }
> +
> +       bpf_program__set_priv(prog, (void *) retprobe, bpf_program__detach_kprobe_legacy);
> +
>         link = bpf_program__attach_perf_event(prog, pfd);
>         if (IS_ERR(link)) {
>                 close(pfd);
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-03-19  4:51 ` [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Andrii Nakryiko
@ 2021-03-22 18:04   ` Rafael David Tinoco
  2021-03-22 18:25     ` Rafael David Tinoco
  0 siblings, 1 reply; 24+ messages in thread
From: Rafael David Tinoco @ 2021-03-22 18:04 UTC (permalink / raw)
  To: andrii.nakryiko; +Cc: bpf, rafaeldtinoco

- This is a RFC (v2).
- Please check my reply with inline comments.
---
 src/libbpf.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 357 insertions(+), 5 deletions(-)

diff --git a/src/libbpf.c b/src/libbpf.c
index 3b1c79f..e9c6025 100644
--- a/src/libbpf.c
+++ b/src/libbpf.c
@@ -9465,6 +9465,10 @@ struct bpf_link {
 	char *pin_path;		/* NULL, if not pinned */
 	int fd;			/* hook FD, -1 if not applicable */
 	bool disconnected;
+	struct {
+		const char *name;
+		bool retprobe;
+	} legacy;
 };
 
 /* Replace link's underlying BPF program with the new one */
@@ -9501,6 +9505,7 @@ int bpf_link__destroy(struct bpf_link *link)
 		link->destroy(link);
 	if (link->pin_path)
 		free(link->pin_path);
+
 	free(link);
 
 	return err;
@@ -9598,6 +9603,8 @@ int bpf_link__unpin(struct bpf_link *link)
 	return 0;
 }
 
+static inline int remove_kprobe_event_legacy(const char*, bool);
+
 static int bpf_link__detach_perf_event(struct bpf_link *link)
 {
 	int err;
@@ -9605,8 +9612,25 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
 	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
 	if (err)
 		err = -errno;
-
 	close(link->fd);
+
+	return err;
+}
+
+static int bpf_link__detach_perf_event_legacy(struct bpf_link *link)
+{
+	int err;
+
+	err = bpf_link__detach_perf_event(link);
+	if (err)
+		err = -errno; // improve this
+
+	/*
+	err = remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
+	if (err)
+		err = -errno;
+	 */
+
 	return err;
 }
 
@@ -9655,6 +9679,48 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 	return link;
 }
 
+struct bpf_link *bpf_program__attach_perf_event_legacy(struct bpf_program *prog,
+						       int pfd)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, err;
+
+	if (pfd < 0) {
+		pr_warn("prog '%s': invalid perf event FD %d\n", prog->name, pfd);
+		return ERR_PTR(-EINVAL);
+	}
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", prog->name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+
+	link->detach = &bpf_link__detach_perf_event_legacy;
+	link->fd = pfd;
+
+	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+		err = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to attach to pfd %d: %s\n", prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		if (err == -EPROTO)
+			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd);
+		return ERR_PTR(err);
+	}
+	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		free(link);
+		pr_warn("prog '%s': failed to enable pfd %d: %s\n", prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return ERR_PTR(err);
+	}
+
+	return link;
+}
+
 /*
  * this function is expected to parse integer in the range of [0, 2^31-1] from
  * given file using scanf format string fmt. If actual parsed value is
@@ -9685,34 +9751,242 @@ static int parse_uint_from_file(const char *file, const char *fmt)
 	return ret;
 }
 
+static int write_uint_to_file(const char *file, unsigned int val)
+{
+	char buf[STRERR_BUFSIZE];
+	int err;
+	FILE *f;
+
+	f = fopen(file, "w");
+	if (!f) {
+		err = -errno;
+		pr_debug("failed to open '%s': %s\n", file,
+			 libbpf_strerror_r(err, buf, sizeof(buf)));
+		return err;
+	}
+	err = fprintf(f, "%u", val);
+	if (err != 1) {
+		err = -errno;
+		pr_debug("failed to write '%u' to '%s': %s\n", val, file,
+			libbpf_strerror_r(err, buf, sizeof(buf)));
+		fclose(f);
+		return err;
+	}
+	fclose(f);
+	return 0;
+}
+
+#define KPROBE_PERF_TYPE	"/sys/bus/event_source/devices/kprobe/type"
+#define UPROBE_PERF_TYPE	"/sys/bus/event_source/devices/uprobe/type"
+#define KPROBERET_FORMAT	"/sys/bus/event_source/devices/kprobe/format/retprobe"
+#define UPROBERET_FORMAT	"/sys/bus/event_source/devices/uprobe/format/retprobe"
+/* legacy kprobe events related files */
+#define KPROBE_EVENTS		"/sys/kernel/debug/tracing/kprobe_events"
+#define KPROBE_LEG_TOGGLE	"/sys/kernel/debug/kprobes/enabled"
+#define KPROBE_LEG_ALL_TOGGLE	"/sys/kernel/debug/tracing/events/kprobes/enable";
+#define KPROBE_SINGLE_TOGGLE	"/sys/kernel/debug/tracing/events/kprobes/%s/enable";
+#define KPROBE_EVENT_ID		"/sys/kernel/debug/tracing/events/kprobes/%s/id";
+
+static bool determine_kprobe_legacy(void)
+{
+	struct stat s;
+
+	return stat(KPROBE_PERF_TYPE, &s) == 0 ? false : true;
+}
+
 static int determine_kprobe_perf_type(void)
 {
-	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+	const char *file = KPROBE_PERF_TYPE;
 
 	return parse_uint_from_file(file, "%d\n");
 }
 
 static int determine_uprobe_perf_type(void)
 {
-	const char *file = "/sys/bus/event_source/devices/uprobe/type";
+	const char *file = UPROBE_PERF_TYPE;
 
 	return parse_uint_from_file(file, "%d\n");
 }
 
 static int determine_kprobe_retprobe_bit(void)
 {
-	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+	const char *file = KPROBERET_FORMAT;
 
 	return parse_uint_from_file(file, "config:%d\n");
 }
 
 static int determine_uprobe_retprobe_bit(void)
 {
-	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+	const char *file = UPROBERET_FORMAT;
 
 	return parse_uint_from_file(file, "config:%d\n");
 }
 
+static int toggle_kprobe_legacy(bool on)
+{
+	static int refcount;
+	static bool initial, veryfirst;
+	const char *file = KPROBE_LEG_TOGGLE;
+
+	if (on) {
+		refcount++;
+		if (veryfirst)
+			return 0;
+		veryfirst = true;
+		/* initial value for KPROB_LEG_TOGGLE */
+		initial = (bool) parse_uint_from_file(file, "%d\n");
+		return write_uint_to_file(file, 1); /* enable kprobes */
+	}
+	refcount--;
+	printf("DEBUG: kprobe_legacy refcount=%d\n", refcount);
+	if (refcount == 0) {
+		/* off ret value back to initial value if last consumer */
+		return write_uint_to_file(file, initial);
+	}
+	return 0;
+}
+
+static int toggle_kprobe_event_legacy_all(bool on)
+{
+	static int refcount;
+	static bool initial, veryfirst;
+	const char *file = KPROBE_LEG_ALL_TOGGLE;
+
+	if (on) {
+		refcount++;
+		if (veryfirst)
+			return 0;
+		veryfirst = true;
+		// initial value for KPROB_LEG_ALL_TOGGLE
+		initial = (bool) parse_uint_from_file(file, "%d\n");
+		return write_uint_to_file(file, 1); // enable kprobes
+	}
+	refcount--;
+	printf("DEBUG: legacy_all refcount=%d\n", refcount);
+	if (refcount == 0) {
+		// off ret value back to initial value if last consumer
+		return write_uint_to_file(file, initial);
+	}
+	return 0;
+}
+
+static int kprobe_event_normalize(char *newname, size_t size, const char *name, bool retprobe)
+{
+	int ret = 0;
+
+	if (IS_ERR(name))
+		return -1;
+
+	if (retprobe)
+		ret = snprintf(newname, size, "kprobes/%s_ret", name);
+	else
+		ret = snprintf(newname, size, "kprobes/%s", name);
+
+	if (ret <= strlen("kprobes/"))
+		ret = -errno;
+
+	return ret;
+}
+
+static int toggle_single_kprobe_event_legacy(bool on, const char *name, bool retprobe)
+{
+	char probename[32], f[96];
+	const char *file = KPROBE_SINGLE_TOGGLE;
+	int ret;
+
+	ret = kprobe_event_normalize(probename, sizeof(probename), name, retprobe);
+	if (ret < 0)
+		return ret;
+
+	snprintf(f, sizeof(f), file, probename + strlen("kprobes/"));
+
+	printf("DEBUG: writing %u to %s\n", (unsigned int) on, f);
+
+	ret = write_uint_to_file(f, (unsigned int) on);
+
+	return ret;
+}
+
+static int poke_kprobe_events(bool add, const char *name, bool retprobe)
+{
+	int fd, ret = 0;
+	char probename[32], cmd[96];
+	const char *file = KPROBE_EVENTS;
+
+	ret = kprobe_event_normalize(probename, sizeof(probename), name, retprobe);
+	if (ret < 0)
+		return ret;
+
+	if (add)
+		snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p', probename, name);
+	else
+		snprintf(cmd, sizeof(cmd), "-:%s", probename);
+
+	printf("DEBUG: %s\n", cmd);
+
+	fd = open(file, O_WRONLY|O_APPEND, 0);
+	if (!fd)
+		return -errno;
+	ret = write(fd, cmd, strlen(cmd));
+	if (ret < 0)
+		ret = -errno;
+	close(fd);
+
+	return ret;
+}
+
+static inline int add_kprobe_event_legacy(const char* func_name, bool retprobe)
+{
+	int ret = 0;
+
+	ret = poke_kprobe_events(true, func_name, retprobe);
+	if (ret < 0)
+		printf("DEBUG: poke_kprobe_events (on) error\n");
+
+	ret = toggle_kprobe_event_legacy_all(true);
+	if (ret < 0)
+		printf("DEBUG: toggle_kprobe_event_legacy_all (on) error\n");
+
+	ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
+	if (ret < 0)
+		printf("DEBUG: toggle_single_kprobe_event_legacy (on) error\n");
+
+	return ret;
+}
+
+static inline int remove_kprobe_event_legacy(const char* func_name, bool retprobe)
+{
+	int ret = 0;
+
+	ret = toggle_kprobe_event_legacy_all(true);
+	if (ret < 0)
+		printf("DEBUG: toggle_kprobe_event_legacy_all (off) error\n");
+
+	ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
+	if (ret < 0)
+		printf("DEBUG: toggle_single_kprobe_event_legacy (off) error\n");
+
+	ret = toggle_single_kprobe_event_legacy(false, func_name, retprobe);
+	if (ret < 0)
+		printf("DEBUG: toggle_single_kprobe_event_legacy (off) error\n");
+
+	ret = poke_kprobe_events(false, func_name, retprobe);
+	if (ret < 0)
+		printf("DEBUG: poke_kprobe_events (off) error\n");
+
+	return ret;
+}
+
+static int determine_kprobe_perf_type_legacy(const char *func_name)
+{
+	char file[96];
+	const char *fname = KPROBE_EVENT_ID;
+
+	snprintf(file, sizeof(file), fname, func_name);
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 				 uint64_t offset, int pid)
 {
@@ -9760,6 +10034,51 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 	return pfd;
 }
 
+static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
+					uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	if (uprobe) // legacy uprobe not supported yet
+		return -1;
+
+	err = toggle_kprobe_legacy(true);
+	if (err < 0) {
+		pr_warn("failed to toggle kprobe legacy support: %s\n", libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	err = add_kprobe_event_legacy(name, retprobe);
+	if (err < 0) {
+		pr_warn("failed to add legacy kprobe event: %s\n", libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	type = determine_kprobe_perf_type_legacy(name);
+	if (err < 0) {
+		pr_warn("failed to determine legacy kprobe event id: %s\n", libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+
+	attr.size = sizeof(attr);
+	attr.config = type;
+	attr.type = PERF_TYPE_TRACEPOINT;
+
+	pfd = syscall(__NR_perf_event_open,
+		      &attr,
+		      pid < 0 ? -1 : pid,
+		      pid == -1 ? 0 : -1,
+		      -1,
+		      PERF_FLAG_FD_CLOEXEC);
+
+	if (pfd < 0) {
+		err = -errno;
+		pr_warn("legacy kprobe perf_event_open() failed: %s\n", libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 					    bool retprobe,
 					    const char *func_name)
@@ -9788,6 +10107,33 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 	return link;
 }
 
+struct bpf_link *bpf_program__attach_kprobe_legacy(struct bpf_program *prog,
+						   bool retprobe,
+						   const char *func_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe_legacy(false, retprobe, func_name, 0, -1);
+	if (pfd < 0) {
+		pr_warn("prog '%s': failed to create %s '%s' legacy perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event_legacy(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warn("prog '%s': failed to attach to %s '%s': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	/* needed history for the legacy probe cleanup */
+	link->legacy.name = func_name;
+	link->legacy.retprobe = retprobe;
+
+	return link;
+}
+
 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
 				      struct bpf_program *prog)
 {
@@ -9797,6 +10143,9 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
 	func_name = prog->sec_name + sec->len;
 	retprobe = strcmp(sec->sec, "kretprobe/") == 0;
 
+	if(determine_kprobe_legacy())
+		return bpf_program__attach_kprobe_legacy(prog, retprobe, func_name);
+
 	return bpf_program__attach_kprobe(prog, retprobe, func_name);
 }
 
@@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
 	free(s->maps);
 	free(s->progs);
 	free(s);
+
+	remove_kprobe_event_legacy("ip_set_create", false);
+	remove_kprobe_event_legacy("ip_set_create", true);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-03-22 18:04   ` [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support Rafael David Tinoco
@ 2021-03-22 18:25     ` Rafael David Tinoco
  2021-03-26 20:50       ` Andrii Nakryiko
  0 siblings, 1 reply; 24+ messages in thread
From: Rafael David Tinoco @ 2021-03-22 18:25 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: Andrii Nakryiko, bpf

> - This is a RFC (v2).
> - Please check my reply with inline comments.

Comments bellow… (no correct formatting for now):

> ---
>  src/libbpf.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 357 insertions(+), 5 deletions(-)
>
> diff --git a/src/libbpf.c b/src/libbpf.c
> index 3b1c79f..e9c6025 100644
> --- a/src/libbpf.c
> +++ b/src/libbpf.c
> @@ -9465,6 +9465,10 @@ struct bpf_link {
>  	char *pin_path;		/* NULL, if not pinned */
>  	int fd;			/* hook FD, -1 if not applicable */
>  	bool disconnected;
> +	struct {
> +		const char *name;
> +		bool retprobe;
> +	} legacy;
>  };

For bpf_link->detach() I needed func_name somewhere.

>
> +static inline int remove_kprobe_event_legacy(const char*, bool);
> +
>  static int bpf_link__detach_perf_event(struct bpf_link *link)
>  {
>  	int err;
> @@ -9605,8 +9612,25 @@ static int bpf_link__detach_perf_event(struct  
> bpf_link *link)
>  	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
>  	if (err)
>  		err = -errno;
> -
>  	close(link->fd);
> +
> +	return err;
> +}
> +
> +static int bpf_link__detach_perf_event_legacy(struct bpf_link *link)
> +{
> +	int err;
> +
> +	err = bpf_link__detach_perf_event(link);
> +	if (err)
> +		err = -errno; // improve this
> +
> +	/*
> +	err = remove_kprobe_event_legacy(link->legacy.name,  
> link->legacy.retprobe);
> +	if (err)
> +		err = -errno;
> +	 */
> +
>  	return err;
>  }

Unfortunately I can’t remove kprobe event name from kprobe_events,
even if I unload it (0 >> enabled) before. It won’t work until the
object is fully unloaded. This is why previous version using
bpf_program__set_priv() used to work. I’m showing this bellow…

Check the last lines of this to understand better.

>
> @@ -9655,6 +9679,48 @@ struct bpf_link  
> *bpf_program__attach_perf_event(struct bpf_program *prog,
>  	return link;
>  }
>
> +struct bpf_link *bpf_program__attach_perf_event_legacy(struct  
> bpf_program *prog,
> +						       int pfd)
> +{
> +	char errmsg[STRERR_BUFSIZE];
> +	struct bpf_link *link;
> +	int prog_fd, err;
> +
> +	if (pfd < 0) {
> +		pr_warn("prog '%s': invalid perf event FD %d\n", prog->name, pfd);
> +		return ERR_PTR(-EINVAL);
> +	}
> +	prog_fd = bpf_program__fd(prog);
> +	if (prog_fd < 0) {
> +		pr_warn("prog '%s': can't attach BPF program w/o FD (did  
> you load it?)\n", prog->name);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	link = calloc(1, sizeof(*link));
> +	if (!link)
> +		return ERR_PTR(-ENOMEM);
> +
> +	link->detach = &bpf_link__detach_perf_event_legacy;

I created another function for all existing ones using _legacy at the end.
This one in particular could have a callback function as argument that would
be passed to link->detach().. this way I could avoid having 2 functions  
alike.

> +	link->fd = pfd;
> +
> +	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
> +		err = -errno;
> +		free(link);
> +		pr_warn("prog '%s': failed to attach to pfd %d: %s\n",  
> prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		if (err == -EPROTO)
> +			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN  
> to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name,  
> pfd);
> +		return ERR_PTR(err);
> +	}
> +	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
> +		err = -errno;
> +		free(link);
> +		pr_warn("prog '%s': failed to enable pfd %d: %s\n",  
> prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return ERR_PTR(err);
> +	}
> +
> +	return link;
> +}
> +
>  /*
>   * this function is expected to parse integer in the range of [0, 2^31-1] from
>   * given file using scanf format string fmt. If actual parsed value is
> @@ -9685,34 +9751,242 @@ static int parse_uint_from_file(const char  
> *file, const char *fmt)
>  	return ret;
>  }
>
> +static int write_uint_to_file(const char *file, unsigned int val)
> +{
> +	char buf[STRERR_BUFSIZE];
> +	int err;
> +	FILE *f;
> +
> +	f = fopen(file, "w");
> +	if (!f) {
> +		err = -errno;
> +		pr_debug("failed to open '%s': %s\n", file,
> +			 libbpf_strerror_r(err, buf, sizeof(buf)));
> +		return err;
> +	}
> +	err = fprintf(f, "%u", val);
> +	if (err != 1) {
> +		err = -errno;
> +		pr_debug("failed to write '%u' to '%s': %s\n", val, file,
> +			libbpf_strerror_r(err, buf, sizeof(buf)));
> +		fclose(f);
> +		return err;
> +	}
> +	fclose(f);
> +	return 0;
> +}
> +
> +#define KPROBE_PERF_TYPE	"/sys/bus/event_source/devices/kprobe/type"
> +#define UPROBE_PERF_TYPE	"/sys/bus/event_source/devices/uprobe/type"
> +#define KPROBERET_FORMAT	 
> "/sys/bus/event_source/devices/kprobe/format/retprobe"
> +#define UPROBERET_FORMAT	 
> "/sys/bus/event_source/devices/uprobe/format/retprobe"
> +/* legacy kprobe events related files */
> +#define KPROBE_EVENTS		"/sys/kernel/debug/tracing/kprobe_events"
> +#define KPROBE_LEG_TOGGLE	"/sys/kernel/debug/kprobes/enabled"
> +#define KPROBE_LEG_ALL_TOGGLE	 
> "/sys/kernel/debug/tracing/events/kprobes/enable";
> +#define KPROBE_SINGLE_TOGGLE	 
> "/sys/kernel/debug/tracing/events/kprobes/%s/enable";
> +#define KPROBE_EVENT_ID	"/sys/kernel/debug/tracing/events/kprobes/%s/id";
> +

This made the life easier: to understand which files were related to what

> +static bool determine_kprobe_legacy(void)
> +{
> +	struct stat s;
> +
> +	return stat(KPROBE_PERF_TYPE, &s) == 0 ? false : true;
> +}
> +
>  static int determine_kprobe_perf_type(void)
>  {
> -	const char *file = "/sys/bus/event_source/devices/kprobe/type";
> +	const char *file = KPROBE_PERF_TYPE;
>
>  	return parse_uint_from_file(file, "%d\n");
>  }
>
>  static int determine_uprobe_perf_type(void)
>  {
> -	const char *file = "/sys/bus/event_source/devices/uprobe/type";
> +	const char *file = UPROBE_PERF_TYPE;
>
>  	return parse_uint_from_file(file, "%d\n");
>  }
>
>  static int determine_kprobe_retprobe_bit(void)
>  {
> -	const char *file =  
> "/sys/bus/event_source/devices/kprobe/format/retprobe";
> +	const char *file = KPROBERET_FORMAT;
>
>  	return parse_uint_from_file(file, "config:%d\n");
>  }
>
>  static int determine_uprobe_retprobe_bit(void)
>  {
> -	const char *file =  
> "/sys/bus/event_source/devices/uprobe/format/retprobe";
> +	const char *file = UPROBERET_FORMAT;
>
>  	return parse_uint_from_file(file, "config:%d\n");
>  }
>
> +static int toggle_kprobe_legacy(bool on)
> +{
> +	static int refcount;
> +	static bool initial, veryfirst;
> +	const char *file = KPROBE_LEG_TOGGLE;
> +
> +	if (on) {
> +		refcount++;
> +		if (veryfirst)
> +			return 0;
> +		veryfirst = true;
> +		/* initial value for KPROB_LEG_TOGGLE */
> +		initial = (bool) parse_uint_from_file(file, "%d\n");
> +		return write_uint_to_file(file, 1); /* enable kprobes */
> +	}
> +	refcount--;
> +	printf("DEBUG: kprobe_legacy refcount=%d\n", refcount);
> +	if (refcount == 0) {
> +		/* off ret value back to initial value if last consumer */
> +		return write_uint_to_file(file, initial);
> +	}
> +	return 0;
> +}
> +
> +static int toggle_kprobe_event_legacy_all(bool on)
> +{
> +	static int refcount;
> +	static bool initial, veryfirst;
> +	const char *file = KPROBE_LEG_ALL_TOGGLE;
> +
> +	if (on) {
> +		refcount++;
> +		if (veryfirst)
> +			return 0;
> +		veryfirst = true;
> +		// initial value for KPROB_LEG_ALL_TOGGLE
> +		initial = (bool) parse_uint_from_file(file, "%d\n");
> +		return write_uint_to_file(file, 1); // enable kprobes
> +	}
> +	refcount--;
> +	printf("DEBUG: legacy_all refcount=%d\n", refcount);
> +	if (refcount == 0) {
> +		// off ret value back to initial value if last consumer
> +		return write_uint_to_file(file, initial);
> +	}
> +	return 0;
> +}

Same thing here: 2 functions that could be reduced to one with an
argument to KPROB_LEG_TOGGLE or KPROB_LEG_ALL_TOGGLE.

I’m using static initial so I can recover the original status of
the “enable” files after the program is unloaded. Unfortunately
this is not multi-task friendly as another process would
step into this logic but I did not want to leave “enabled”
after we unload if it wasn’t before.

I’m saying this because of your idea of having PID as the kprobe
event names… it would have the same problem… We could, in theory
leave all “enabled” files enabled (1) at the end, use PID in the
kprobe event names and unload only our events… but then I would
leave /sys/kernel/debug/kprobes/enabled enabled even if it was
not.. because we could be concurrent to other tasks using libbpf.

> +static int kprobe_event_normalize(char *newname, size_t size, const char  
> *name, bool retprobe)
> +{
> +	int ret = 0;
> +
> +	if (IS_ERR(name))
> +		return -1;
> +
> +	if (retprobe)
> +		ret = snprintf(newname, size, "kprobes/%s_ret", name);
> +	else
> +		ret = snprintf(newname, size, "kprobes/%s", name);
> +
> +	if (ret <= strlen("kprobes/"))
> +		ret = -errno;
> +
> +	return ret;
> +}
> +
> +static int toggle_single_kprobe_event_legacy(bool on, const char *name,  
> bool retprobe)
> +{
> +	char probename[32], f[96];
> +	const char *file = KPROBE_SINGLE_TOGGLE;
> +	int ret;
> +
> +	ret = kprobe_event_normalize(probename, sizeof(probename), name,  
> retprobe);
> +	if (ret < 0)
> +		return ret;
> +
> +	snprintf(f, sizeof(f), file, probename + strlen("kprobes/"));
> +
> +	printf("DEBUG: writing %u to %s\n", (unsigned int) on, f);
> +
> +	ret = write_uint_to_file(f, (unsigned int) on);
> +
> +	return ret;
> +}
> +
> +static int poke_kprobe_events(bool add, const char *name, bool retprobe)
> +{
> +	int fd, ret = 0;
> +	char probename[32], cmd[96];
> +	const char *file = KPROBE_EVENTS;
> +
> +	ret = kprobe_event_normalize(probename, sizeof(probename), name,  
> retprobe);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (add)
> +		snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p',  
> probename, name);
> +	else
> +		snprintf(cmd, sizeof(cmd), "-:%s", probename);
> +
> +	printf("DEBUG: %s\n", cmd);
> +
> +	fd = open(file, O_WRONLY|O_APPEND, 0);
> +	if (!fd)
> +		return -errno;
> +	ret = write(fd, cmd, strlen(cmd));
> +	if (ret < 0)
> +		ret = -errno;
> +	close(fd);
> +
> +	return ret;
> +}
> +
> +static inline int add_kprobe_event_legacy(const char* func_name, bool  
> retprobe)
> +{
> +	int ret = 0;
> +
> +	ret = poke_kprobe_events(true, func_name, retprobe);
> +	if (ret < 0)
> +		printf("DEBUG: poke_kprobe_events (on) error\n");
> +
> +	ret = toggle_kprobe_event_legacy_all(true);
> +	if (ret < 0)
> +		printf("DEBUG: toggle_kprobe_event_legacy_all (on) error\n");
> +
> +	ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
> +	if (ret < 0)
> +		printf("DEBUG: toggle_single_kprobe_event_legacy (on) error\n");
> +
> +	return ret;
> +}
> +
> +static inline int remove_kprobe_event_legacy(const char* func_name, bool  
> retprobe)
> +{
> +	int ret = 0;
> +
> +	ret = toggle_kprobe_event_legacy_all(true);
> +	if (ret < 0)
> +		printf("DEBUG: toggle_kprobe_event_legacy_all (off) error\n");
> +
> +	ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
> +	if (ret < 0)
> +		printf("DEBUG: toggle_single_kprobe_event_legacy (off) error\n");
> +
> +	ret = toggle_single_kprobe_event_legacy(false, func_name, retprobe);
> +	if (ret < 0)
> +		printf("DEBUG: toggle_single_kprobe_event_legacy (off) error\n");
> +
> +	ret = poke_kprobe_events(false, func_name, retprobe);
> +	if (ret < 0)
> +		printf("DEBUG: poke_kprobe_events (off) error\n");
> +
> +	return ret;
> +}

I’m doing a “make sure what has to be enabled to be enabled” approach here.
Please ignore all the DEBUGs, etc, I’ll deal with errors after its good.

> +
> +static int determine_kprobe_perf_type_legacy(const char *func_name)
> +{
> +	char file[96];
> +	const char *fname = KPROBE_EVENT_ID;
> +
> +	snprintf(file, sizeof(file), fname, func_name);
> +
> +	return parse_uint_from_file(file, "%d\n");
> +}
> +
>  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>  				 uint64_t offset, int pid)
>  {
> @@ -9760,6 +10034,51 @@ static int perf_event_open_probe(bool uprobe,  
> bool retprobe, const char *name,
>  	return pfd;
>  }
>
> +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe,  
> const char *name,
> +					uint64_t offset, int pid)
> +{
> +	struct perf_event_attr attr = {};
> +	char errmsg[STRERR_BUFSIZE];
> +	int type, pfd, err;
> +
> +	if (uprobe) // legacy uprobe not supported yet
> +		return -1;

Would that be ok for now ? Until we are sure kprobe legacy interface is  
good ?

> +
> +	err = toggle_kprobe_legacy(true);
> +	if (err < 0) {
> +		pr_warn("failed to toggle kprobe legacy support: %s\n",  
> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return err;
> +	}
> +	err = add_kprobe_event_legacy(name, retprobe);
> +	if (err < 0) {
> +		pr_warn("failed to add legacy kprobe event: %s\n",  
> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return err;
> +	}
> +	type = determine_kprobe_perf_type_legacy(name);
> +	if (err < 0) {
> +		pr_warn("failed to determine legacy kprobe event id: %s\n",  
> libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> +		return type;
> +	}
> +
> +	attr.size = sizeof(attr);
> +	attr.config = type;
> +	attr.type = PERF_TYPE_TRACEPOINT;
> +
> +	pfd = syscall(__NR_perf_event_open,
> +		      &attr,
> +		      pid < 0 ? -1 : pid,
> +		      pid == -1 ? 0 : -1,
> +		      -1,
> +		      PERF_FLAG_FD_CLOEXEC);
> +
> +	if (pfd < 0) {
> +		err = -errno;
> +		pr_warn("legacy kprobe perf_event_open() failed: %s\n",  
> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return err;
> +	}
> +	return pfd;
> +}
> +
>  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>  					    bool retprobe,
>  					    const char *func_name)
> @@ -9788,6 +10107,33 @@ struct bpf_link  
> *bpf_program__attach_kprobe(struct bpf_program *prog,
>  	return link;
>  }
>
> +struct bpf_link *bpf_program__attach_kprobe_legacy(struct bpf_program  
> *prog,
> +						   bool retprobe,
> +						   const char *func_name)
> +{
> +	char errmsg[STRERR_BUFSIZE];
> +	struct bpf_link *link;
> +	int pfd, err;
> +
> +	pfd = perf_event_open_probe_legacy(false, retprobe, func_name, 0, -1);
> +	if (pfd < 0) {
> +		pr_warn("prog '%s': failed to create %s '%s' legacy perf  
> event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name,  
> libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
> +		return ERR_PTR(pfd);
> +	}
> +	link = bpf_program__attach_perf_event_legacy(prog, pfd);
> +	if (IS_ERR(link)) {
> +		close(pfd);
> +		err = PTR_ERR(link);
> +		pr_warn("prog '%s': failed to attach to %s '%s': %s\n",  
> prog->name, retprobe ? "kretprobe" : "kprobe", func_name,  
> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +		return link;
> +	}
> +	/* needed history for the legacy probe cleanup */
> +	link->legacy.name = func_name;
> +	link->legacy.retprobe = retprobe;

Note I’m not setting those variables inside
bpf_program__atach_perf_event_legacy(). They’re not available
there and I did not want to make them to be (through arguments).

> +
> +	return link;
> +}
> +
>  static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
>  				      struct bpf_program *prog)
>  {
> @@ -9797,6 +10143,9 @@ static struct bpf_link *attach_kprobe(const struct  
> bpf_sec_def *sec,
>  	func_name = prog->sec_name + sec->len;
>  	retprobe = strcmp(sec->sec, "kretprobe/") == 0;
>
> +	if(determine_kprobe_legacy())
> +		return bpf_program__attach_kprobe_legacy(prog, retprobe, func_name);
> +
>  	return bpf_program__attach_kprobe(prog, retprobe, func_name);
>  }

I’m assuming this is okay based on your saying of detecting a feature
instead of using the if(x) if(y) approach.

>
> @@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct  
> bpf_object_skeleton *s)
>  	free(s->maps);
>  	free(s->progs);
>  	free(s);
> +
> +	remove_kprobe_event_legacy("ip_set_create", false);
> +	remove_kprobe_event_legacy("ip_set_create", true);

This is the main issue I wanted to show you before continuing.
I cannot remove the kprobe event unless the obj is unloaded.
That is why I have this hard coded here, just because I was
testing. Any thoughts how to cleanup the kprobes without
jeopardising the API too much ?

>  }
> —
> 2.17.1



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-03-22 18:25     ` Rafael David Tinoco
@ 2021-03-26 20:50       ` Andrii Nakryiko
  2021-04-07  4:49         ` Rafael David Tinoco
  0 siblings, 1 reply; 24+ messages in thread
From: Andrii Nakryiko @ 2021-03-26 20:50 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: bpf

On Mon, Mar 22, 2021 at 11:25 AM Rafael David Tinoco
<rafaeldtinoco@ubuntu.com> wrote:
>
> > - This is a RFC (v2).
> > - Please check my reply with inline comments.
>
> Comments bellow… (no correct formatting for now):
>
> > ---
> >  src/libbpf.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 357 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/libbpf.c b/src/libbpf.c
> > index 3b1c79f..e9c6025 100644
> > --- a/src/libbpf.c
> > +++ b/src/libbpf.c
> > @@ -9465,6 +9465,10 @@ struct bpf_link {
> >       char *pin_path;         /* NULL, if not pinned */
> >       int fd;                 /* hook FD, -1 if not applicable */
> >       bool disconnected;
> > +     struct {
> > +             const char *name;
> > +             bool retprobe;
> > +     } legacy;
> >  };
>
> For bpf_link->detach() I needed func_name somewhere.

Right, though it's not func_name that you need, but "event_name".
Let's add link ([0]) to poke_kprobe_events somewhere, and probably
event have example full syntax of all the commands:

 p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]  : Set a probe
 r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]  : Set a return probe
 p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS]       : Set a return probe
 -:[GRP/]EVENT                                         : Clear a probe

  [0] https://www.kernel.org/doc/html/latest/trace/kprobetrace.html


Now, you should not extend bpf_link itself. Create bpf_link_kprobe,
that will have those two extra fields. Put struct bpf_link as a first
field of bpf_link_kprobe. We used to have bpf_link_fd, you can try to
find it in Git history to see how it was done.

And another problem -- you should allocate memory for this event_name,
not rely on the user to keep that memory for you.


>
> >
> > +static inline int remove_kprobe_event_legacy(const char*, bool);
> > +
> >  static int bpf_link__detach_perf_event(struct bpf_link *link)
> >  {
> >       int err;
> > @@ -9605,8 +9612,25 @@ static int bpf_link__detach_perf_event(struct
> > bpf_link *link)
> >       err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
> >       if (err)
> >               err = -errno;
> > -
> >       close(link->fd);
> > +
> > +     return err;
> > +}
> > +
> > +static int bpf_link__detach_perf_event_legacy(struct bpf_link *link)
> > +{
> > +     int err;
> > +
> > +     err = bpf_link__detach_perf_event(link);
> > +     if (err)
> > +             err = -errno; // improve this
> > +
> > +     /*
> > +     err = remove_kprobe_event_legacy(link->legacy.name,
> > link->legacy.retprobe);
> > +     if (err)
> > +             err = -errno;
> > +      */
> > +
> >       return err;
> >  }
>
> Unfortunately I can’t remove kprobe event name from kprobe_events,
> even if I unload it (0 >> enabled) before. It won’t work until the
> object is fully unloaded. This is why previous version using
> bpf_program__set_priv() used to work. I’m showing this bellow…
>
> Check the last lines of this to understand better.
>
> >
> > @@ -9655,6 +9679,48 @@ struct bpf_link
> > *bpf_program__attach_perf_event(struct bpf_program *prog,
> >       return link;
> >  }
> >
> > +struct bpf_link *bpf_program__attach_perf_event_legacy(struct
> > bpf_program *prog,
> > +                                                    int pfd)
> > +{
> > +     char errmsg[STRERR_BUFSIZE];
> > +     struct bpf_link *link;
> > +     int prog_fd, err;
> > +
> > +     if (pfd < 0) {
> > +             pr_warn("prog '%s': invalid perf event FD %d\n", prog->name, pfd);
> > +             return ERR_PTR(-EINVAL);
> > +     }
> > +     prog_fd = bpf_program__fd(prog);
> > +     if (prog_fd < 0) {
> > +             pr_warn("prog '%s': can't attach BPF program w/o FD (did
> > you load it?)\n", prog->name);
> > +             return ERR_PTR(-EINVAL);
> > +     }
> > +
> > +     link = calloc(1, sizeof(*link));
> > +     if (!link)
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     link->detach = &bpf_link__detach_perf_event_legacy;
>
> I created another function for all existing ones using _legacy at the end.
> This one in particular could have a callback function as argument that would
> be passed to link->detach().. this way I could avoid having 2 functions
> alike.
>
> > +     link->fd = pfd;
> > +
> > +     if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
> > +             err = -errno;
> > +             free(link);
> > +             pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
> > prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             if (err == -EPROTO)
> > +                     pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN
> > to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name,
> > pfd);
> > +             return ERR_PTR(err);
> > +     }
> > +     if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
> > +             err = -errno;
> > +             free(link);
> > +             pr_warn("prog '%s': failed to enable pfd %d: %s\n",
> > prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return ERR_PTR(err);
> > +     }
> > +
> > +     return link;
> > +}
> > +
> >  /*
> >   * this function is expected to parse integer in the range of [0, 2^31-1] from
> >   * given file using scanf format string fmt. If actual parsed value is
> > @@ -9685,34 +9751,242 @@ static int parse_uint_from_file(const char
> > *file, const char *fmt)
> >       return ret;
> >  }
> >
> > +static int write_uint_to_file(const char *file, unsigned int val)
> > +{
> > +     char buf[STRERR_BUFSIZE];
> > +     int err;
> > +     FILE *f;
> > +
> > +     f = fopen(file, "w");
> > +     if (!f) {
> > +             err = -errno;
> > +             pr_debug("failed to open '%s': %s\n", file,
> > +                      libbpf_strerror_r(err, buf, sizeof(buf)));
> > +             return err;
> > +     }
> > +     err = fprintf(f, "%u", val);
> > +     if (err != 1) {
> > +             err = -errno;
> > +             pr_debug("failed to write '%u' to '%s': %s\n", val, file,
> > +                     libbpf_strerror_r(err, buf, sizeof(buf)));
> > +             fclose(f);
> > +             return err;
> > +     }
> > +     fclose(f);
> > +     return 0;
> > +}
> > +
> > +#define KPROBE_PERF_TYPE     "/sys/bus/event_source/devices/kprobe/type"
> > +#define UPROBE_PERF_TYPE     "/sys/bus/event_source/devices/uprobe/type"
> > +#define KPROBERET_FORMAT
> > "/sys/bus/event_source/devices/kprobe/format/retprobe"
> > +#define UPROBERET_FORMAT
> > "/sys/bus/event_source/devices/uprobe/format/retprobe"
> > +/* legacy kprobe events related files */
> > +#define KPROBE_EVENTS                "/sys/kernel/debug/tracing/kprobe_events"
> > +#define KPROBE_LEG_TOGGLE    "/sys/kernel/debug/kprobes/enabled"

Not LEG, please, LEGACY

> > +#define KPROBE_LEG_ALL_TOGGLE
> > "/sys/kernel/debug/tracing/events/kprobes/enable";
> > +#define KPROBE_SINGLE_TOGGLE
> > "/sys/kernel/debug/tracing/events/kprobes/%s/enable";
> > +#define KPROBE_EVENT_ID      "/sys/kernel/debug/tracing/events/kprobes/%s/id";
> > +
>
> This made the life easier: to understand which files were related to what

Ok, sure, just not legs, please :)

>
> > +static bool determine_kprobe_legacy(void)
> > +{
> > +     struct stat s;
> > +
> > +     return stat(KPROBE_PERF_TYPE, &s) == 0 ? false : true;

there is access(file, F_OK) which is nicer to use for checking file existence

> > +}
> > +
> >  static int determine_kprobe_perf_type(void)
> >  {
> > -     const char *file = "/sys/bus/event_source/devices/kprobe/type";
> > +     const char *file = KPROBE_PERF_TYPE;

just inline then, what's the point of this variable?

> >
> >       return parse_uint_from_file(file, "%d\n");
> >  }
> >
> >  static int determine_uprobe_perf_type(void)
> >  {
> > -     const char *file = "/sys/bus/event_source/devices/uprobe/type";
> > +     const char *file = UPROBE_PERF_TYPE;
> >
> >       return parse_uint_from_file(file, "%d\n");
> >  }
> >
> >  static int determine_kprobe_retprobe_bit(void)
> >  {
> > -     const char *file =
> > "/sys/bus/event_source/devices/kprobe/format/retprobe";
> > +     const char *file = KPROBERET_FORMAT;
> >
> >       return parse_uint_from_file(file, "config:%d\n");
> >  }
> >
> >  static int determine_uprobe_retprobe_bit(void)
> >  {
> > -     const char *file =
> > "/sys/bus/event_source/devices/uprobe/format/retprobe";
> > +     const char *file = UPROBERET_FORMAT;
> >
> >       return parse_uint_from_file(file, "config:%d\n");
> >  }
> >
> > +static int toggle_kprobe_legacy(bool on)
> > +{
> > +     static int refcount;
> > +     static bool initial, veryfirst;
> > +     const char *file = KPROBE_LEG_TOGGLE;
> > +
> > +     if (on) {
> > +             refcount++;
> > +             if (veryfirst)
> > +                     return 0;
> > +             veryfirst = true;
> > +             /* initial value for KPROB_LEG_TOGGLE */
> > +             initial = (bool) parse_uint_from_file(file, "%d\n");
> > +             return write_uint_to_file(file, 1); /* enable kprobes */
> > +     }
> > +     refcount--;
> > +     printf("DEBUG: kprobe_legacy refcount=%d\n", refcount);
> > +     if (refcount == 0) {
> > +             /* off ret value back to initial value if last consumer */
> > +             return write_uint_to_file(file, initial);
> > +     }
> > +     return 0;
> > +}
> > +
> > +static int toggle_kprobe_event_legacy_all(bool on)
> > +{
> > +     static int refcount;
> > +     static bool initial, veryfirst;
> > +     const char *file = KPROBE_LEG_ALL_TOGGLE;
> > +
> > +     if (on) {
> > +             refcount++;
> > +             if (veryfirst)
> > +                     return 0;
> > +             veryfirst = true;
> > +             // initial value for KPROB_LEG_ALL_TOGGLE
> > +             initial = (bool) parse_uint_from_file(file, "%d\n");
> > +             return write_uint_to_file(file, 1); // enable kprobes
> > +     }
> > +     refcount--;
> > +     printf("DEBUG: legacy_all refcount=%d\n", refcount);
> > +     if (refcount == 0) {
> > +             // off ret value back to initial value if last consumer
> > +             return write_uint_to_file(file, initial);
> > +     }
> > +     return 0;
> > +}
>
> Same thing here: 2 functions that could be reduced to one with an
> argument to KPROB_LEG_TOGGLE or KPROB_LEG_ALL_TOGGLE.
>
> I’m using static initial so I can recover the original status of
> the “enable” files after the program is unloaded. Unfortunately
> this is not multi-task friendly as another process would
> step into this logic but I did not want to leave “enabled”
> after we unload if it wasn’t before.
>
> I’m saying this because of your idea of having PID as the kprobe
> event names… it would have the same problem… We could, in theory
> leave all “enabled” files enabled (1) at the end, use PID in the
> kprobe event names and unload only our events… but then I would
> leave /sys/kernel/debug/kprobes/enabled enabled even if it was
> not.. because we could be concurrent to other tasks using libbpf.

So I don't get at all why you have these toggles, especially
ALL_TOGGLE? You shouldn't try to determine the state of another probe.
You always know whether you want to enable or disable your specific
toggle. I'm very confused by all this.

>
> > +static int kprobe_event_normalize(char *newname, size_t size, const char
> > *name, bool retprobe)
> > +{
> > +     int ret = 0;
> > +
> > +     if (IS_ERR(name))
> > +             return -1;
> > +
> > +     if (retprobe)
> > +             ret = snprintf(newname, size, "kprobes/%s_ret", name);
> > +     else
> > +             ret = snprintf(newname, size, "kprobes/%s", name);
> > +
> > +     if (ret <= strlen("kprobes/"))
> > +             ret = -errno;
> > +
> > +     return ret;
> > +}
> > +
> > +static int toggle_single_kprobe_event_legacy(bool on, const char *name,
> > bool retprobe)

don't get why you need this function either...

> > +{
> > +     char probename[32], f[96];
> > +     const char *file = KPROBE_SINGLE_TOGGLE;
> > +     int ret;
> > +
> > +     ret = kprobe_event_normalize(probename, sizeof(probename), name,
> > retprobe);
> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     snprintf(f, sizeof(f), file, probename + strlen("kprobes/"));
> > +
> > +     printf("DEBUG: writing %u to %s\n", (unsigned int) on, f);
> > +
> > +     ret = write_uint_to_file(f, (unsigned int) on);
> > +
> > +     return ret;
> > +}
> > +
> > +static int poke_kprobe_events(bool add, const char *name, bool retprobe)
> > +{
> > +     int fd, ret = 0;
> > +     char probename[32], cmd[96];
> > +     const char *file = KPROBE_EVENTS;
> > +
> > +     ret = kprobe_event_normalize(probename, sizeof(probename), name,
> > retprobe);

just have that if/else + snprintf right here, no need to jump through hoops

> > +     if (ret < 0)
> > +             return ret;
> > +
> > +     if (add)
> > +             snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p',
> > probename, name);
> > +     else
> > +             snprintf(cmd, sizeof(cmd), "-:%s", probename);
> > +
> > +     printf("DEBUG: %s\n", cmd);
> > +
> > +     fd = open(file, O_WRONLY|O_APPEND, 0);
> > +     if (!fd)
> > +             return -errno;
> > +     ret = write(fd, cmd, strlen(cmd));
> > +     if (ret < 0)
> > +             ret = -errno;
> > +     close(fd);
> > +
> > +     return ret;
> > +}
> > +
> > +static inline int add_kprobe_event_legacy(const char* func_name, bool
> > retprobe)
> > +{
> > +     int ret = 0;
> > +
> > +     ret = poke_kprobe_events(true, func_name, retprobe);
> > +     if (ret < 0)
> > +             printf("DEBUG: poke_kprobe_events (on) error\n");
> > +
> > +     ret = toggle_kprobe_event_legacy_all(true);

why?... why do you need to touch the state of other probes. This will
never work reliable but also should not be required

> > +     if (ret < 0)
> > +             printf("DEBUG: toggle_kprobe_event_legacy_all (on) error\n");
> > +
> > +     ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
> > +     if (ret < 0)
> > +             printf("DEBUG: toggle_single_kprobe_event_legacy (on) error\n");
> > +
> > +     return ret;
> > +}
> > +
> > +static inline int remove_kprobe_event_legacy(const char* func_name, bool
> > retprobe)
> > +{
> > +     int ret = 0;
> > +
> > +     ret = toggle_kprobe_event_legacy_all(true);
> > +     if (ret < 0)
> > +             printf("DEBUG: toggle_kprobe_event_legacy_all (off) error\n");
> > +
> > +     ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
> > +     if (ret < 0)
> > +             printf("DEBUG: toggle_single_kprobe_event_legacy (off) error\n");
> > +
> > +     ret = toggle_single_kprobe_event_legacy(false, func_name, retprobe);
> > +     if (ret < 0)
> > +             printf("DEBUG: toggle_single_kprobe_event_legacy (off) error\n");
> > +
> > +     ret = poke_kprobe_events(false, func_name, retprobe);
> > +     if (ret < 0)
> > +             printf("DEBUG: poke_kprobe_events (off) error\n");
> > +
> > +     return ret;
> > +}
>
> I’m doing a “make sure what has to be enabled to be enabled” approach here.
> Please ignore all the DEBUGs, etc, I’ll deal with errors after its good.

again, you haven't explained why. Don't touch probes you haven't created.

>
> > +
> > +static int determine_kprobe_perf_type_legacy(const char *func_name)
> > +{
> > +     char file[96];
> > +     const char *fname = KPROBE_EVENT_ID;

again, what's the point of this variable, just inline

and this is a problem with those #defines. I need to now jump back and
forth to see what KPROBE_EVENT_ID is. So unless we have to use them in
multiple places, I'd keep those constants where they were, honestly.

> > +
> > +     snprintf(file, sizeof(file), fname, func_name);
> > +
> > +     return parse_uint_from_file(file, "%d\n");
> > +}
> > +
> >  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
> >                                uint64_t offset, int pid)
> >  {
> > @@ -9760,6 +10034,51 @@ static int perf_event_open_probe(bool uprobe,
> > bool retprobe, const char *name,
> >       return pfd;
> >  }
> >
> > +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe,
> > const char *name,
> > +                                     uint64_t offset, int pid)
> > +{
> > +     struct perf_event_attr attr = {};
> > +     char errmsg[STRERR_BUFSIZE];
> > +     int type, pfd, err;
> > +
> > +     if (uprobe) // legacy uprobe not supported yet
> > +             return -1;
>
> Would that be ok for now ? Until we are sure kprobe legacy interface is
> good ?
>

it's ok, but return -EOPNOTSUPP instead


> > +
> > +     err = toggle_kprobe_legacy(true);
> > +     if (err < 0) {
> > +             pr_warn("failed to toggle kprobe legacy support: %s\n",
> > libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return err;
> > +     }
> > +     err = add_kprobe_event_legacy(name, retprobe);
> > +     if (err < 0) {
> > +             pr_warn("failed to add legacy kprobe event: %s\n",
> > libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return err;
> > +     }
> > +     type = determine_kprobe_perf_type_legacy(name);
> > +     if (err < 0) {
> > +             pr_warn("failed to determine legacy kprobe event id: %s\n",
> > libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
> > +             return type;
> > +     }
> > +
> > +     attr.size = sizeof(attr);
> > +     attr.config = type;
> > +     attr.type = PERF_TYPE_TRACEPOINT;
> > +
> > +     pfd = syscall(__NR_perf_event_open,
> > +                   &attr,
> > +                   pid < 0 ? -1 : pid,
> > +                   pid == -1 ? 0 : -1,
> > +                   -1,
> > +                   PERF_FLAG_FD_CLOEXEC);

btw, a question. Is there similar legacy interface to tracepoints? It
would be good to support those as well. Doesn't have to happen at the
same time, but let's just keep it in mind as we implement this.

> > +
> > +     if (pfd < 0) {
> > +             err = -errno;
> > +             pr_warn("legacy kprobe perf_event_open() failed: %s\n",
> > libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return err;
> > +     }
> > +     return pfd;
> > +}
> > +
> >  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
> >                                           bool retprobe,
> >                                           const char *func_name)
> > @@ -9788,6 +10107,33 @@ struct bpf_link
> > *bpf_program__attach_kprobe(struct bpf_program *prog,
> >       return link;
> >  }
> >
> > +struct bpf_link *bpf_program__attach_kprobe_legacy(struct bpf_program

this is wrong from the API perspective. The goal is to not make users
decide whether they want legacy or non-legacy interfaces. With all
your work there shouldn't be any new APIs.
bpf_program__attach_kprobe() should detect which interface to use and
just use it.

> > *prog,
> > +                                                bool retprobe,
> > +                                                const char *func_name)
> > +{
> > +     char errmsg[STRERR_BUFSIZE];
> > +     struct bpf_link *link;
> > +     int pfd, err;
> > +
> > +     pfd = perf_event_open_probe_legacy(false, retprobe, func_name, 0, -1);
> > +     if (pfd < 0) {
> > +             pr_warn("prog '%s': failed to create %s '%s' legacy perf
> > event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
> > libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
> > +             return ERR_PTR(pfd);
> > +     }
> > +     link = bpf_program__attach_perf_event_legacy(prog, pfd);
> > +     if (IS_ERR(link)) {
> > +             close(pfd);
> > +             err = PTR_ERR(link);
> > +             pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
> > prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
> > libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> > +             return link;
> > +     }
> > +     /* needed history for the legacy probe cleanup */
> > +     link->legacy.name = func_name;
> > +     link->legacy.retprobe = retprobe;
>
> Note I’m not setting those variables inside
> bpf_program__atach_perf_event_legacy(). They’re not available
> there and I did not want to make them to be (through arguments).

as I said above, you shouldn't assume that func_name will still be
allocated by the time you get to detaching kprobe. You should strdup()
or do whatever is necessary to own necessary memory.

>
> > +
> > +     return link;
> > +}
> > +
> >  static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
> >                                     struct bpf_program *prog)
> >  {
> > @@ -9797,6 +10143,9 @@ static struct bpf_link *attach_kprobe(const struct
> > bpf_sec_def *sec,
> >       func_name = prog->sec_name + sec->len;
> >       retprobe = strcmp(sec->sec, "kretprobe/") == 0;
> >
> > +     if(determine_kprobe_legacy())
> > +             return bpf_program__attach_kprobe_legacy(prog, retprobe, func_name);
> > +

the other way around, attach_kprobe should just delegate to
bpf_program__attach_kprobe, but bpf_program__attach_kprobe should be
smart enough

> >       return bpf_program__attach_kprobe(prog, retprobe, func_name);
> >  }
>
> I’m assuming this is okay based on your saying of detecting a feature
> instead of using the if(x) if(y) approach.
>
> >
> > @@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct
> > bpf_object_skeleton *s)
> >       free(s->maps);
> >       free(s->progs);(),
> >       free(s);
> > +
> > +     remove_kprobe_event_legacy("ip_set_create", false);
> > +     remove_kprobe_event_legacy("ip_set_create", true);
>
> This is the main issue I wanted to show you before continuing.
> I cannot remove the kprobe event unless the obj is unloaded.
> That is why I have this hard coded here, just because I was
> testing. Any thoughts how to cleanup the kprobes without
> jeopardising the API too much ?


cannot as in it doesn't work for whatever reason? Or what do you mean?

I see that you had bpf_link__detach_perf_event_legacy calling
remove_kprobe_event_legacy, what didn't work?

You somehow ended up with 3 times more code and I have more questions
now then before. When you say "it doesn't work", please make sure to
explain what exactly doesn't work, what you did, what you expected to
happen/see.

>
> >  }
> > —
> > 2.17.1
>
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-03-26 20:50       ` Andrii Nakryiko
@ 2021-04-07  4:49         ` Rafael David Tinoco
  2021-04-07 22:33           ` Andrii Nakryiko
  0 siblings, 1 reply; 24+ messages in thread
From: Rafael David Tinoco @ 2021-04-07  4:49 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: LKML BPF

Sorry taking so long for replying on this… have been working in:
https://github.com/rafaeldtinoco/conntracker/tree/main/ebpf
as a consumer for the work being proposed by this patch.

Current working version at:
https://github.com/rafaeldtinoco/conntracker/blob/main/ebpf/patches/libbpf-introduce-legacy-kprobe-events-support.patch
About to be changed with suggestions from this thread.

>>> --- a/src/libbpf.c
>>> +++ b/src/libbpf.c
>>> @@ -9465,6 +9465,10 @@ struct bpf_link {
>>>       char *pin_path;         /* NULL, if not pinned */
>>>       int fd;                 /* hook FD, -1 if not applicable */
>>>       bool disconnected;
>>> +     struct {
>>> +             const char *name;
>>> +             bool retprobe;
>>> +     } legacy;
>>>  };
>>
>> For bpf_link->detach() I needed func_name somewhere.
>
> Right, though it's not func_name that you need, but "event_name".

Yep.

> Let's add link ([0]) to poke_kprobe_events somewhere, and probably
> event have example full syntax of all the commands:
>
>  p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]  : Set a probe
>  r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]  : Set a return probe
>  p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS]       : Set a return probe
>  -:[GRP/]EVENT                                         : Clear a probe
>
>   [0] https://www.kernel.org/doc/html/latest/trace/kprobetrace.html

Add [0] as a comment you say (as a reference) ? Or you mean to alter
the way I’m writing to kprobe_events file in a more complete way ?

> Now, you should not extend bpf_link itself. Create bpf_link_kprobe,
> that will have those two extra fields. Put struct bpf_link as a first
> field of bpf_link_kprobe. We used to have bpf_link_fd, you can try to
> find it in Git history to see how it was done.

Will do.

> And another problem -- you should allocate memory for this event_name,
> not rely on the user to keep that memory for you.

Definitely.

>>> +
>>> +#define KPROBE_PERF_TYPE     "/sys/bus/event_source/devices/kprobe/type"
>>> +#define UPROBE_PERF_TYPE     "/sys/bus/event_source/devices/uprobe/type"
>>> +#define KPROBERET_FORMAT
>>> "/sys/bus/event_source/devices/kprobe/format/retprobe"
>>> +#define UPROBERET_FORMAT
>>> "/sys/bus/event_source/devices/uprobe/format/retprobe"
>>> +/* legacy kprobe events related files */
>>> +#define KPROBE_EVENTS                 
>>> "/sys/kernel/debug/tracing/kprobe_events"
>>> +#define KPROBE_LEG_TOGGLE    "/sys/kernel/debug/kprobes/enabled"
>
> Not LEG, please, LEGACY

I’m removing all those like you said, not much advantage in going back
and forth because of those definitions.

>
>>> +#define KPROBE_LEG_ALL_TOGGLE
>>> "/sys/kernel/debug/tracing/events/kprobes/enable";
>>> +#define KPROBE_SINGLE_TOGGLE
>>> "/sys/kernel/debug/tracing/events/kprobes/%s/enable";
>>> +#define KPROBE_EVENT_ID       
>>> "/sys/kernel/debug/tracing/events/kprobes/%s/id";
>>> +
>>
>> This made the life easier: to understand which files were related to what
>
> Ok, sure, just not legs, please :)
>
>>> +static bool determine_kprobe_legacy(void)
>>> +{
>>> +     struct stat s;
>>> +
>>> +     return stat(KPROBE_PERF_TYPE, &s) == 0 ? false : true;
>
> there is access(file, F_OK) which is nicer to use for checking file  
> existence

Sure.

>>> +static int toggle_kprobe_legacy(bool on)
>>> +{
>>> +     static int refcount;
>>> +     static bool initial, veryfirst;
>>> +     const char *file = KPROBE_LEG_TOGGLE;
>>> +
>>> +     if (on) {
>>> +             refcount++;
>>> +             if (veryfirst)
>>> +                     return 0;
>>> +             veryfirst = true;
>>> +             /* initial value for KPROB_LEG_TOGGLE */
>>> +             initial = (bool) parse_uint_from_file(file, "%d\n");
>>> +             return write_uint_to_file(file, 1); /* enable kprobes */
>>> +     }
>>> +     refcount--;
>>> +     printf("DEBUG: kprobe_legacy refcount=%d\n", refcount);
>>> +     if (refcount == 0) {
>>> +             /* off ret value back to initial value if last consumer */
>>> +             return write_uint_to_file(file, initial);
>>> +     }
>>> +     return 0;
>>> +}
>>> +
>>> +static int toggle_kprobe_event_legacy_all(bool on)
>>> +{
>>> +     static int refcount;
>>> +     static bool initial, veryfirst;
>>> +     const char *file = KPROBE_LEG_ALL_TOGGLE;
>>> +
>>> +     if (on) {
>>> +             refcount++;
>>> +             if (veryfirst)
>>> +                     return 0;
>>> +             veryfirst = true;
>>> +             // initial value for KPROB_LEG_ALL_TOGGLE
>>> +             initial = (bool) parse_uint_from_file(file, "%d\n");
>>> +             return write_uint_to_file(file, 1); // enable kprobes
>>> +     }
>>> +     refcount--;
>>> +     printf("DEBUG: legacy_all refcount=%d\n", refcount);
>>> +     if (refcount == 0) {
>>> +             // off ret value back to initial value if last consumer
>>> +             return write_uint_to_file(file, initial);
>>> +     }
>>> +     return 0;
>>> +}
>>
>> Same thing here: 2 functions that could be reduced to one with an
>> argument to KPROB_LEG_TOGGLE or KPROB_LEG_ALL_TOGGLE.
>>
>> I’m using static initial so I can recover the original status of
>> the “enable” files after the program is unloaded. Unfortunately
>> this is not multi-task friendly as another process would
>> step into this logic but I did not want to leave “enabled”
>> after we unload if it wasn’t before.
>>
>> I’m saying this because of your idea of having PID as the kprobe
>> event names… it would have the same problem… We could, in theory
>> leave all “enabled” files enabled (1) at the end, use PID in the
>> kprobe event names and unload only our events… but then I would
>> leave /sys/kernel/debug/kprobes/enabled enabled even if it was
>> not.. because we could be concurrent to other tasks using libbpf.
>
> So I don't get at all why you have these toggles, especially
> ALL_TOGGLE? You shouldn't try to determine the state of another probe.
> You always know whether you want to enable or disable your specific
> toggle. I'm very confused by all this.

Yes, this was a confusing thing indeed and to be honest it proved to
be very buggy when testing with conntracker. What I’ll do (or I’m
doing) is to toggle ON to needed files before the probe is added:

static inline int add_kprobe_event_legacy(const char* func_name, bool  
retprobe)
{
	int ret = 0;

	ret |= poke_kprobe_events(true, func_name, retprobe);
	ret |= toggle_kprobe_event_legacy_all(true);
	ret |= toggle_single_kprobe_event_legacy(true, func_name, retprobe);

	return ret;
}

1) /sys/kernel/debug/tracing/kprobe_events => 1
2) /sys/kernel/debug/tracing/events/kprobes/enable => 1
3) /sys/kernel/debug/tracing/events/kprobes/%s/enable => 1

And toggle off only kprobe_event:

static inline int remove_kprobe_event_legacy(const char* func_name, bool  
retprobe)
{
	int ret = 0;

	ret |= toggle_single_kprobe_event_legacy(false, func_name, retprobe);
	ret |= poke_kprobe_events(false, func_name, retprobe);

	return ret;
}

1) /sys/kernel/debug/tracing/events/kprobes/%s/enable => 0

This is working good for my tests.

>
>>> +static int kprobe_event_normalize(char *newname, size_t size, const char
>>> *name, bool retprobe)
>>> +{
>>> +     int ret = 0;
>>> +
>>> +     if (IS_ERR(name))
>>> +             return -1;
>>> +
>>> +     if (retprobe)
>>> +             ret = snprintf(newname, size, "kprobes/%s_ret", name);
>>> +     else
>>> +             ret = snprintf(newname, size, "kprobes/%s", name);
>>> +
>>> +     if (ret <= strlen("kprobes/"))
>>> +             ret = -errno;
>>> +
>>> +     return ret;
>>> +}
>>> +
>>> +static int toggle_single_kprobe_event_legacy(bool on, const char *name,
>>> bool retprobe)
>
> don't get why you need this function either...

Because of /sys/kernel/debug/tracing/events/kprobes/%s/enable. I’m
toggling it to OFF before removing the kprobe in kprobe_events, like
showed above.

>
>>> +{
>>> +     char probename[32], f[96];
>>> +     const char *file = KPROBE_SINGLE_TOGGLE;
>>> +     int ret;
>>> +
>>> +     ret = kprobe_event_normalize(probename, sizeof(probename), name,
>>> retprobe);
>>> +     if (ret < 0)
>>> +             return ret;
>>> +
>>> +     snprintf(f, sizeof(f), file, probename + strlen("kprobes/"));
>>> +
>>> +     printf("DEBUG: writing %u to %s\n", (unsigned int) on, f);
>>> +
>>> +     ret = write_uint_to_file(f, (unsigned int) on);
>>> +
>>> +     return ret;
>>> +}
>>> +
>>> +static int poke_kprobe_events(bool add, const char *name, bool retprobe)
>>> +{
>>> +     int fd, ret = 0;
>>> +     char probename[32], cmd[96];
>>> +     const char *file = KPROBE_EVENTS;
>>> +
>>> +     ret = kprobe_event_normalize(probename, sizeof(probename), name,
>>> retprobe);
>
> just have that if/else + snprintf right here, no need to jump through hoops
>

Sure.

>>> +     if (ret < 0)
>>> +             return ret;
>>> +
>>> +     if (add)
>>> +             snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p',
>>> probename, name);
>>> +     else
>>> +             snprintf(cmd, sizeof(cmd), "-:%s", probename);
>>> +
>>> +     printf("DEBUG: %s\n", cmd);
>>> +
>>> +     fd = open(file, O_WRONLY|O_APPEND, 0);
>>> +     if (!fd)
>>> +             return -errno;
>>> +     ret = write(fd, cmd, strlen(cmd));
>>> +     if (ret < 0)
>>> +             ret = -errno;
>>> +     close(fd);
>>> +
>>> +     return ret;
>>> +}
>>> +
>>> +static inline int add_kprobe_event_legacy(const char* func_name, bool
>>> retprobe)
>>> +{
>>> +     int ret = 0;
>>> +
>>> +     ret = poke_kprobe_events(true, func_name, retprobe);
>>> +     if (ret < 0)
>>> +             printf("DEBUG: poke_kprobe_events (on) error\n");
>>> +
>>> +     ret = toggle_kprobe_event_legacy_all(true);
>
> why?... why do you need to touch the state of other probes. This will
> never work reliable but also should not be required

Addressed above.

>>> +     if (ret < 0)
>>> +             printf("DEBUG: toggle_kprobe_event_legacy_all (on)  
>>> error\n");
>>> +
>>> +     ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
>>> +     if (ret < 0)
>>> +             printf("DEBUG: toggle_single_kprobe_event_legacy (on)  
>>> error\n");
>>> +
>>> +     return ret;
>>> +}
>>> +
>>> +static inline int remove_kprobe_event_legacy(const char* func_name, bool
>>> retprobe)
>>> +{
>>> +     int ret = 0;
>>> +
>>> +     ret = toggle_kprobe_event_legacy_all(true);
>>> +     if (ret < 0)
>>> +             printf("DEBUG: toggle_kprobe_event_legacy_all (off)  
>>> error\n");
>>> +
>>> +     ret = toggle_single_kprobe_event_legacy(true, func_name, retprobe);
>>> +     if (ret < 0)
>>> +             printf("DEBUG: toggle_single_kprobe_event_legacy (off)  
>>> error\n");
>>> +
>>> +     ret = toggle_single_kprobe_event_legacy(false, func_name,  
>>> retprobe);
>>> +     if (ret < 0)
>>> +             printf("DEBUG: toggle_single_kprobe_event_legacy (off)  
>>> error\n");
>>> +
>>> +     ret = poke_kprobe_events(false, func_name, retprobe);
>>> +     if (ret < 0)
>>> +             printf("DEBUG: poke_kprobe_events (off) error\n");
>>> +
>>> +     return ret;
>>> +}
>>
>> I’m doing a “make sure what has to be enabled to be enabled” approach  
>> here.
>> Please ignore all the DEBUGs, etc, I’ll deal with errors after its good.
>
> again, you haven't explained why. Don't touch probes you haven't created.

Addressed above.

>
>>> +
>>> +static int determine_kprobe_perf_type_legacy(const char *func_name)
>>> +{
>>> +     char file[96];
>>> +     const char *fname = KPROBE_EVENT_ID;
>
> again, what's the point of this variable, just inline
>
> and this is a problem with those #defines. I need to now jump back and
> forth to see what KPROBE_EVENT_ID is. So unless we have to use them in
> multiple places, I'd keep those constants where they were, honestly.

Addressed above.
>

>>> +
>>> +     snprintf(file, sizeof(file), fname, func_name);
>>> +
>>> +     return parse_uint_from_file(file, "%d\n");
>>> +}
>>> +
>>>  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>>>                                uint64_t offset, int pid)
>>>  {
>>> @@ -9760,6 +10034,51 @@ static int perf_event_open_probe(bool uprobe,
>>> bool retprobe, const char *name,
>>>       return pfd;
>>>  }
>>>
>>> +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe,
>>> const char *name,
>>> +                                     uint64_t offset, int pid)
>>> +{
>>> +     struct perf_event_attr attr = {};
>>> +     char errmsg[STRERR_BUFSIZE];
>>> +     int type, pfd, err;
>>> +
>>> +     if (uprobe) // legacy uprobe not supported yet
>>> +             return -1;
>>
>> Would that be ok for now ? Until we are sure kprobe legacy interface is
>> good ?
>
> it's ok, but return -EOPNOTSUPP instead

Cool.

>>> +
>>> +     err = toggle_kprobe_legacy(true);
>>> +     if (err < 0) {
>>> +             pr_warn("failed to toggle kprobe legacy support: %s\n",
>>> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>>> +             return err;
>>> +     }
>>> +     err = add_kprobe_event_legacy(name, retprobe);
>>> +     if (err < 0) {
>>> +             pr_warn("failed to add legacy kprobe event: %s\n",
>>> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>>> +             return err;
>>> +     }
>>> +     type = determine_kprobe_perf_type_legacy(name);
>>> +     if (err < 0) {
>>> +             pr_warn("failed to determine legacy kprobe event id: %s\n",
>>> libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
>>> +             return type;
>>> +     }
>>> +
>>> +     attr.size = sizeof(attr);
>>> +     attr.config = type;
>>> +     attr.type = PERF_TYPE_TRACEPOINT;
>>> +
>>> +     pfd = syscall(__NR_perf_event_open,
>>> +                   &attr,
>>> +                   pid < 0 ? -1 : pid,
>>> +                   pid == -1 ? 0 : -1,
>>> +                   -1,
>>> +                   PERF_FLAG_FD_CLOEXEC);
>
> btw, a question. Is there similar legacy interface to tracepoints? It
> would be good to support those as well. Doesn't have to happen at the
> same time, but let's just keep it in mind as we implement this.

I *think* the current one is good enough for ~4.15, but I’ll test and
make sure to _at least_ document we need something else if we really
do.

>
>>> +
>>> +     if (pfd < 0) {
>>> +             err = -errno;
>>> +             pr_warn("legacy kprobe perf_event_open() failed: %s\n",
>>> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>>> +             return err;
>>> +     }
>>> +     return pfd;
>>> +}
>>> +
>>>  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>>>                                           bool retprobe,
>>>                                           const char *func_name)
>>> @@ -9788,6 +10107,33 @@ struct bpf_link
>>> *bpf_program__attach_kprobe(struct bpf_program *prog,
>>>       return link;
>>>  }
>>>
>>> +struct bpf_link *bpf_program__attach_kprobe_legacy(struct bpf_program
>
> this is wrong from the API perspective. The goal is to not make users
> decide whether they want legacy or non-legacy interfaces. With all
> your work there shouldn't be any new APIs.
> bpf_program__attach_kprobe() should detect which interface to use and
> just use it.

Yep, I failed to recognise it as an API symbol back when I did this.

>
>>> *prog,
>>> +                                                bool retprobe,
>>> +                                                const char *func_name)
>>> +{
>>> +     char errmsg[STRERR_BUFSIZE];
>>> +     struct bpf_link *link;
>>> +     int pfd, err;
>>> +
>>> +     pfd = perf_event_open_probe_legacy(false, retprobe, func_name, 0,  
>>> -1);
>>> +     if (pfd < 0) {
>>> +             pr_warn("prog '%s': failed to create %s '%s' legacy perf
>>> event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
>>> libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
>>> +             return ERR_PTR(pfd);
>>> +     }
>>> +     link = bpf_program__attach_perf_event_legacy(prog, pfd);
>>> +     if (IS_ERR(link)) {
>>> +             close(pfd);
>>> +             err = PTR_ERR(link);
>>> +             pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
>>> prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
>>> libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>>> +             return link;
>>> +     }
>>> +     /* needed history for the legacy probe cleanup */
>>> +     link->legacy.name = func_name;
>>> +     link->legacy.retprobe = retprobe;
>>
>> Note I’m not setting those variables inside
>> bpf_program__atach_perf_event_legacy(). They’re not available
>> there and I did not want to make them to be (through arguments).
>
> as I said above, you shouldn't assume that func_name will still be
> allocated by the time you get to detaching kprobe. You should strdup()
> or do whatever is necessary to own necessary memory.

+1

>
>>> +
>>> +     return link;
>>> +}
>>> +
>>>  static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
>>>                                     struct bpf_program *prog)
>>>  {
>>> @@ -9797,6 +10143,9 @@ static struct bpf_link */(const struct
>>> bpf_sec_def *sec,
>>>       func_name = prog->sec_name + sec->len;
>>>       retprobe = strcmp(sec->sec, "kretprobe/") == 0;
>>>
>>> +     if(determine_kprobe_legacy())
>>> +             return bpf_program__attach_kprobe_legacy(prog, retprobe,  
>>> func_name);
>>> +
>
> the other way around, attach_kprobe should just delegate to
> bpf_program__attach_kprobe, but bpf_program__attach_kprobe should be
> smart enough

Understood.

>
>>>      return bpf_program__attach_kprobe(prog, retprobe, func_name);
>>>  }
>>
>> I’m assuming this is okay based on your saying of detecting a feature
>> instead of using the if(x) if(y) approach.
>>
>>> @@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct
>>> bpf_object_skeleton *s)
>>>       free(s->maps);
>>>       free(s->progs);(),
>>>       free(s);
>>> +
>>> +     remove_kprobe_event_legacy("ip_set_create", false);
>>> +     remove_kprobe_event_legacy("ip_set_create", true);
>>
>> This is the main issue I wanted to show you before continuing.
>> I cannot remove the kprobe event unless the obj is unloaded.
>> That is why I have this hard coded here, just because I was
>> testing. Any thoughts how to cleanup the kprobes without
>> jeopardising the API too much ?
>
> cannot as in it doesn't work for whatever reason? Or what do you mean?
>
> I see that you had bpf_link__detach_perf_event_legacy calling
> remove_kprobe_event_legacy, what didn't work?
>

I’m sorry for not being very clear here. What happens is that, if I
try to remove the kprobe_event_legacy() BEFORE:

if (s->progs)
	bpf_object__detach_skeleton(s);
if (s->obj)
	bpf_object__close(*s->obj);

It fails with generic write error on kprobe_events file. I need to
remove legacy kprobe AFTER object closure. To workaround this on
my project, and to show you this issue, I have come up with:

void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
{
         int i, j;
         struct probeleft {
                 char *probename;
                 bool retprobe;
         } probesleft[24];

         for (i = 0, j = 0; i < s->prog_cnt; i++) {
                 struct bpf_link **link = s->progs[i].link;
                 if ((*link)->legacy.name) {
                         memset(&probesleft[j], 0, sizeof(struct probeleft));
                         probesleft[j].probename = strdup((*link)->legacy.name);
                         probesleft[j].retprobe = (*link)->legacy.retprobe;
                         j++;
                 }
         }

         if (s->progs)
                 bpf_object__detach_skeleton(s);
         if (s->obj)
                 bpf_object__close(*s->obj);
         free(s->maps);
         free(s->progs);
         free(s);

         for (j--; j >= 0; j--) {
                 remove_kprobe_event_legacy(probesleft[j].probename, probesleft[j].retprobe);
                 free(probesleft[j].probename);
         }
}

Which, of course, is not what I’m suggesting to the lib, but shows
the problem and gives you a better idea on how to solve it not
breaking the API.

> You somehow ended up with 3 times more code and I have more questions
> now then before. When you say "it doesn't work", please make sure to
> explain what exactly doesn't work, what you did, what you expected to
> happen/see.

Deal. Thanks a lot for reviewing all this.

-rafaeldtinoco


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-07  4:49         ` Rafael David Tinoco
@ 2021-04-07 22:33           ` Andrii Nakryiko
  2021-04-08 23:59             ` John Fastabend
  2021-04-14 14:30             ` Rafael David Tinoco
  0 siblings, 2 replies; 24+ messages in thread
From: Andrii Nakryiko @ 2021-04-07 22:33 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: LKML BPF

On Tue, Apr 6, 2021 at 9:49 PM Rafael David Tinoco
<rafaeldtinoco@ubuntu.com> wrote:
>
> Sorry taking so long for replying on this… have been working in:
> https://github.com/rafaeldtinoco/conntracker/tree/main/ebpf
> as a consumer for the work being proposed by this patch.
>
> Current working version at:
> https://github.com/rafaeldtinoco/conntracker/blob/main/ebpf/patches/libbpf-introduce-legacy-kprobe-events-support.patch
> About to be changed with suggestions from this thread.
>
> >>> --- a/src/libbpf.c
> >>> +++ b/src/libbpf.c
> >>> @@ -9465,6 +9465,10 @@ struct bpf_link {
> >>>       char *pin_path;         /* NULL, if not pinned */
> >>>       int fd;                 /* hook FD, -1 if not applicable */
> >>>       bool disconnected;
> >>> +     struct {
> >>> +             const char *name;
> >>> +             bool retprobe;
> >>> +     } legacy;
> >>>  };
> >>
> >> For bpf_link->detach() I needed func_name somewhere.
> >
> > Right, though it's not func_name that you need, but "event_name".
>
> Yep.
>
> > Let's add link ([0]) to poke_kprobe_events somewhere, and probably
> > event have example full syntax of all the commands:
> >
> >  p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]  : Set a probe
> >  r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]  : Set a return probe
> >  p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS]       : Set a return probe
> >  -:[GRP/]EVENT                                         : Clear a probe
> >
> >   [0] https://www.kernel.org/doc/html/latest/trace/kprobetrace.html
>
> Add [0] as a comment you say (as a reference) ? Or you mean to alter
> the way I’m writing to kprobe_events file in a more complete way ?

As a reference.

>
> > Now, you should not extend bpf_link itself. Create bpf_link_kprobe,
> > that will have those two extra fields. Put struct bpf_link as a first
> > field of bpf_link_kprobe. We used to have bpf_link_fd, you can try to
> > find it in Git history to see how it was done.
>
> Will do.
>

[...]

> > So I don't get at all why you have these toggles, especially
> > ALL_TOGGLE? You shouldn't try to determine the state of another probe.
> > You always know whether you want to enable or disable your specific
> > toggle. I'm very confused by all this.
>
> Yes, this was a confusing thing indeed and to be honest it proved to
> be very buggy when testing with conntracker. What I’ll do (or I’m
> doing) is to toggle ON to needed files before the probe is added:
>
> static inline int add_kprobe_event_legacy(const char* func_name, bool
> retprobe)
> {
>         int ret = 0;
>
>         ret |= poke_kprobe_events(true, func_name, retprobe);
>         ret |= toggle_kprobe_event_legacy_all(true);
>         ret |= toggle_single_kprobe_event_legacy(true, func_name, retprobe);
>
>         return ret;
> }
>
> 1) /sys/kernel/debug/tracing/kprobe_events => 1
> 2) /sys/kernel/debug/tracing/events/kprobes/enable => 1
> 3) /sys/kernel/debug/tracing/events/kprobes/%s/enable => 1

Ok, hold on. I don't think we should use those /enable files,
actually. Double-checking what BCC does ([0]) and my local demo app I
wrote a while ago, we use perf_event_open() to activate kprobe, once
it is created, and that's all that is necessary.

  [0] https://github.com/iovisor/bcc/blob/master/src/cc/libbpf.c#L1046

>
> And toggle off only kprobe_event:
>
> static inline int remove_kprobe_event_legacy(const char* func_name, bool
> retprobe)
> {
>         int ret = 0;
>
>         ret |= toggle_single_kprobe_event_legacy(false, func_name, retprobe);
>         ret |= poke_kprobe_events(false, func_name, retprobe);
>
>         return ret;
> }
>
> 1) /sys/kernel/debug/tracing/events/kprobes/%s/enable => 0
>
> This is working good for my tests.
>
> >
> >>> +static int kprobe_event_normalize(char *newname, size_t size, const char
> >>> *name, bool retprobe)
> >>> +{
> >>> +     int ret = 0;
> >>> +
> >>> +     if (IS_ERR(name))
> >>> +             return -1;
> >>> +
> >>> +     if (retprobe)
> >>> +             ret = snprintf(newname, size, "kprobes/%s_ret", name);
> >>> +     else
> >>> +             ret = snprintf(newname, size, "kprobes/%s", name);
> >>> +
> >>> +     if (ret <= strlen("kprobes/"))
> >>> +             ret = -errno;
> >>> +
> >>> +     return ret;
> >>> +}
> >>> +
> >>> +static int toggle_single_kprobe_event_legacy(bool on, const char *name,
> >>> bool retprobe)
> >
> > don't get why you need this function either...
>
> Because of /sys/kernel/debug/tracing/events/kprobes/%s/enable. I’m
> toggling it to OFF before removing the kprobe in kprobe_events, like
> showed above.

Alright, see above about enable files, it doesn't seem necessary,
actually. You use poke_kprobe_events() to add or remove kprobe to the
kernel. That gives you event_name and its id (from
/sys/kernel/debug/tracing/events/kprobes/%s/id). You then use that id
to create perf_event and activate BPF program:

  struct perf_event_attr attr;
  struct bpf_link* link;
  int fd = -1, err, id;
  FILE* f = NULL;

  err = poke_kprobe_events(true /*add*/, func_name, is_kretprobe);
  if (err) {
    fprintf(stderr, "failed to create kprobe event: %d\n", err);
    return NULL;
  }

  snprintf(
      fname,
      sizeof(fname),
      "/sys/kernel/debug/tracing/events/kprobes/%s/id",
      func_name);
  f = fopen(fname, "r");
  if (!f) {
    fprintf(stderr, "failed to open kprobe id file '%s': %d\n", fname, -errno);
    goto err_out;
  }

  if (fscanf(f, "%d\n", &id) != 1) {
    fprintf(stderr, "failed to read kprobe id from '%s': %d\n", fname, -errno);
    goto err_out;
  }

  fclose(f);
  f = NULL;

  memset(&attr, 0, sizeof(attr));
  attr.size = sizeof(attr);
  attr.config = id;
  attr.type = PERF_TYPE_TRACEPOINT;
  attr.sample_period = 1;
  attr.wakeup_events = 1;

  fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
  if (fd < 0) {
    fprintf(
        stderr,
        "failed to create perf event for kprobe ID %d: %d\n",
        id,
        -errno);
    goto err_out;
  }

  link = bpf_program__attach_perf_event(prog, fd);

And that should be it. It doesn't seem like either BCC or my example
(which I'm sure worked last time) does anything with /enable files and
I'm sure all that works.

[...]

> >>>      return bpf_program__attach_kprobe(prog, retprobe, func_name);
> >>>  }
> >>
> >> I’m assuming this is okay based on your saying of detecting a feature
> >> instead of using the if(x) if(y) approach.
> >>
> >>> @@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct
> >>> bpf_object_skeleton *s)
> >>>       free(s->maps);
> >>>       free(s->progs);(),
> >>>       free(s);
> >>> +
> >>> +     remove_kprobe_event_legacy("ip_set_create", false);
> >>> +     remove_kprobe_event_legacy("ip_set_create", true);
> >>
> >> This is the main issue I wanted to show you before continuing.
> >> I cannot remove the kprobe event unless the obj is unloaded.
> >> That is why I have this hard coded here, just because I was
> >> testing. Any thoughts how to cleanup the kprobes without
> >> jeopardising the API too much ?
> >
> > cannot as in it doesn't work for whatever reason? Or what do you mean?
> >
> > I see that you had bpf_link__detach_perf_event_legacy calling
> > remove_kprobe_event_legacy, what didn't work?
> >
>
> I’m sorry for not being very clear here. What happens is that, if I
> try to remove the kprobe_event_legacy() BEFORE:
>
> if (s->progs)
>         bpf_object__detach_skeleton(s);
> if (s->obj)
>         bpf_object__close(*s->obj);
>
> It fails with generic write error on kprobe_events file. I need to
> remove legacy kprobe AFTER object closure. To workaround this on
> my project, and to show you this issue, I have come up with:
>
> void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
> {
>          int i, j;
>          struct probeleft {
>                  char *probename;
>                  bool retprobe;
>          } probesleft[24];
>
>          for (i = 0, j = 0; i < s->prog_cnt; i++) {
>                  struct bpf_link **link = s->progs[i].link;
>                  if ((*link)->legacy.name) {
>                          memset(&probesleft[j], 0, sizeof(struct probeleft));
>                          probesleft[j].probename = strdup((*link)->legacy.name);
>                          probesleft[j].retprobe = (*link)->legacy.retprobe;
>                          j++;
>                  }
>          }
>
>          if (s->progs)
>                  bpf_object__detach_skeleton(s);
>          if (s->obj)
>                  bpf_object__close(*s->obj);
>          free(s->maps);
>          free(s->progs);
>          free(s);
>
>          for (j--; j >= 0; j--) {
>                  remove_kprobe_event_legacy(probesleft[j].probename, probesleft[j].retprobe);
>                  free(probesleft[j].probename);
>          }
> }
>
> Which, of course, is not what I’m suggesting to the lib, but shows
> the problem and gives you a better idea on how to solve it not
> breaking the API.
>

bpf_link__destroy() callback should handle that, no? You'll close perf
event FD, which will "free up" kprobe and you can do
poke_kprobe_events(false /*remove */, ...). Or am I still missing
something?

> > You somehow ended up with 3 times more code and I have more questions
> > now then before. When you say "it doesn't work", please make sure to
> > explain what exactly doesn't work, what you did, what you expected to
> > happen/see.
>
> Deal. Thanks a lot for reviewing all this.
>
> -rafaeldtinoco
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-07 22:33           ` Andrii Nakryiko
@ 2021-04-08 23:59             ` John Fastabend
  2021-04-14 14:30             ` Rafael David Tinoco
  1 sibling, 0 replies; 24+ messages in thread
From: John Fastabend @ 2021-04-08 23:59 UTC (permalink / raw)
  To: Andrii Nakryiko, Rafael David Tinoco; +Cc: LKML BPF

Andrii Nakryiko wrote:
> On Tue, Apr 6, 2021 at 9:49 PM Rafael David Tinoco
> <rafaeldtinoco@ubuntu.com> wrote:
> >
> > Sorry taking so long for replying on this… have been working in:
> > https://github.com/rafaeldtinoco/conntracker/tree/main/ebpf
> > as a consumer for the work being proposed by this patch.
> >
> > Current working version at:
> > https://github.com/rafaeldtinoco/conntracker/blob/main/ebpf/patches/libbpf-introduce-legacy-kprobe-events-support.patch
> > About to be changed with suggestions from this thread.
> >

Just catching up on this thread now.

> > > don't get why you need this function either...
> >
> > Because of /sys/kernel/debug/tracing/events/kprobes/%s/enable. I’m
> > toggling it to OFF before removing the kprobe in kprobe_events, like
> > showed above.
> 
> Alright, see above about enable files, it doesn't seem necessary,
> actually. You use poke_kprobe_events() to add or remove kprobe to the
> kernel. That gives you event_name and its id (from
> /sys/kernel/debug/tracing/events/kprobes/%s/id). You then use that id
> to create perf_event and activate BPF program:
> 
>   struct perf_event_attr attr;
>   struct bpf_link* link;
>   int fd = -1, err, id;
>   FILE* f = NULL;
> 
>   err = poke_kprobe_events(true /*add*/, func_name, is_kretprobe);
>   if (err) {
>     fprintf(stderr, "failed to create kprobe event: %d\n", err);
>     return NULL;
>   }
> 
>   snprintf(
>       fname,
>       sizeof(fname),
>       "/sys/kernel/debug/tracing/events/kprobes/%s/id",
>       func_name);
>   f = fopen(fname, "r");
>   if (!f) {
>     fprintf(stderr, "failed to open kprobe id file '%s': %d\n", fname, -errno);
>     goto err_out;
>   }
> 
>   if (fscanf(f, "%d\n", &id) != 1) {
>     fprintf(stderr, "failed to read kprobe id from '%s': %d\n", fname, -errno);
>     goto err_out;
>   }
> 
>   fclose(f);
>   f = NULL;
> 
>   memset(&attr, 0, sizeof(attr));
>   attr.size = sizeof(attr);
>   attr.config = id;
>   attr.type = PERF_TYPE_TRACEPOINT;
>   attr.sample_period = 1;
>   attr.wakeup_events = 1;
> 
>   fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
>   if (fd < 0) {
>     fprintf(
>         stderr,
>         "failed to create perf event for kprobe ID %d: %d\n",
>         id,
>         -errno);
>     goto err_out;
>   }
> 
>   link = bpf_program__attach_perf_event(prog, fd);
> 
> And that should be it. It doesn't seem like either BCC or my example
> (which I'm sure worked last time) does anything with /enable files and
> I'm sure all that works.

FWIW I also have a similar patch on my stack that does this and was
working fine for us. I've got a note here to submit it, but its
been stuck on the todo list.

I'll post it here maybe its helpful,

+static int write_to_kprobe_events(const char *name,
+                                 uint64_t offset, int pid, bool retprobe)
+{
+       const char *kprobe_events = "/sys/kernel/debug/tracing/kprobe_events";
+       int fd = open(kprobe_events, O_WRONLY | O_APPEND, 0);
+       char buf[PATH_MAX];
+       int err;
+
+       if (fd < 0) {
+               err = -errno;
+               pr_warn("Failed open kprobe_events: %s\n", strerror(errno));
+               return err;
+       }
+       snprintf(buf, sizeof(buf), "%c:kprobes/%s %s",
+                retprobe ? 'r' : 'p', name, name);
+       err = write(fd, buf, strlen(buf));
+       close(fd);
+       if (err < 0) {
+               err = -errno;
+               pr_warn("Failed write kprobe_events: %s\n", strerror(errno));
+               return err;
+       }
+       return 0;
+}
+
+/* If we do not have an event_source/../kprobes then we can try to use
+ * kprobe-base event tracing, for details see documentation kprobetrace.rst
+ */
+static int perf_event_open_probe_debugfs(bool uprobe, bool retprobe, const char *name,
+                                        uint64_t offset, int pid)
+{
+       const char *kprobes_dir = "/sys/kernel/debug/tracing/events/kprobes/";
+       struct perf_event_attr attr = {};
+       char errmsg[STRERR_BUFSIZE];
+       char file[PATH_MAX];
+       int pfd, err, id;
+
+       if (uprobe) {
+               return -EOPNOTSUPP;
+       } else {
+               err = write_to_kprobe_events(name, offset, pid, retprobe);
+               if (err < 0)
+                       return err;
+               err = snprintf(file, sizeof(file), "%s/%s/id", kprobes_dir, name);
+               if (err < 0)
+                       return -errno;
+               id = parse_uint_from_file(file, "%d\n");
+               if (id < 0)
+                       return err;
+               attr.size = sizeof(attr);
+               attr.type = PERF_TYPE_TRACEPOINT;
+               attr.config = id;
+       }
+
+       /* pid filter is meaningful only for uprobes */
+       pfd = syscall(__NR_perf_event_open, &attr,
+                     pid < 0 ? -1 : pid /* pid */,
+                     pid == -1 ? 0 : -1 /* cpu */,
+                     -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+       if (pfd < 0) {
+               err = -errno;
+               pr_warn("%s perf_event_open_probe_debugfs() failed: %s\n",
+                       uprobe ? "uprobe" : "kprobe",
+                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+               return err;
+       }
+       return pfd;
+}

> 
> [...]
> 
> > >>>      return bpf_program__attach_kprobe(prog, retprobe, func_name);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-07 22:33           ` Andrii Nakryiko
  2021-04-08 23:59             ` John Fastabend
@ 2021-04-14 14:30             ` Rafael David Tinoco
  2021-04-14 20:06               ` Rafael David Tinoco
  2021-04-14 23:23               ` Andrii Nakryiko
  1 sibling, 2 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-04-14 14:30 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: LKML BPF

> 
>>> So I don't get at all why you have these toggles, especially
>>> ALL_TOGGLE? You shouldn't try to determine the state of another probe.
>>> You always know whether you want to enable or disable your specific
>>> toggle. I'm very confused by all this.
>> 
>> Yes, this was a confusing thing indeed and to be honest it proved to
>> be very buggy when testing with conntracker. What I’ll do (or I’m
>> doing) is to toggle ON to needed files before the probe is added:
>> 
>> static inline int add_kprobe_event_legacy(const char* func_name, bool
>> retprobe)
>> {
>>        int ret = 0;
>> 
>>        ret |= poke_kprobe_events(true, func_name, retprobe);
>>        ret |= toggle_kprobe_event_legacy_all(true);
>>        ret |= toggle_single_kprobe_event_legacy(true, func_name, retprobe);
>> 
>>        return ret;
>> }
>> 
>> 1) /sys/kernel/debug/tracing/kprobe_events => 1
>> 2) /sys/kernel/debug/tracing/events/kprobes/enable => 1
>> 3) /sys/kernel/debug/tracing/events/kprobes/%s/enable => 1
> 
> Ok, hold on. I don't think we should use those /enable files,
> actually. Double-checking what BCC does ([0]) and my local demo app I
> wrote a while ago, we use perf_event_open() to activate kprobe, once
> it is created, and that's all that is necessary.
> 
>  [0] https://github.com/iovisor/bcc/blob/master/src/cc/libbpf.c#L1046

No, they are not needed. Those are enabling ftrace kprobe feature:

trace_events.c:
    event_create_dir()
        trace_create_file("enable") -> 
            ftrace_enable_fops():
            .write = event_enable_write() -> ftrace_event_enable_disable()

And kprobe perf events works fine without playing with them as long as:
/sys/kernel/debug/tracing/kprobe_events is always 1 (should we enable
it by default or consider it is enabled and don’t change its value ?).

>> 
>> Because of /sys/kernel/debug/tracing/events/kprobes/%s/enable. I’m
>> toggling it to OFF before removing the kprobe in kprobe_events, like
>> showed above.
> 
> Alright, see above about enable files, it doesn't seem necessary,
> actually. You use poke_kprobe_events() to add or remove kprobe to the
> kernel. That gives you event_name and its id (from
> /sys/kernel/debug/tracing/events/kprobes/%s/id). You then use that id
> to create perf_event and activate BPF program:

Yes, with a small reservation I just found out: function names might
change because of GCC optimisations.. In my case I found out that:

# cat /proc/kallsyms | grep udp_send_skb
ffffffff8f9e0090 t udp_send_skb.isra.48

udp_send_skb probe was not always working because the function name
was changed. Then I saw BCC had this issue back in 2018 and is
fixing it now:

https://github.com/iovisor/bcc/issues/1754
https://github.com/iovisor/bcc/pull/2930

So I thought I could do the same: check if function name is the same
in /proc/kallsyms or if it has changed and use the changed name if
needed (to add to kprobe_events).

Will include that logic and remove the ‘enables’.

> 
> And that should be it. It doesn't seem like either BCC or my example
> (which I'm sure worked last time) does anything with /enable files and
> I'm sure all that works.

First comment.

> 
> [...]
> 
>>>>>     return bpf_program__attach_kprobe(prog, retprobe, func_name);
>>>>> }
>>>> 
>>>> I’m assuming this is okay based on your saying of detecting a feature
>>>> instead of using the if(x) if(y) approach.
>>>> 
>>>>> @@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct
>>>>> bpf_object_skeleton *s)
>>>>>      free(s->maps);
>>>>>      free(s->progs);(),
>>>>>      free(s);
>>>>> +
>>>>> +     remove_kprobe_event_legacy("ip_set_create", false);
>>>>> +     remove_kprobe_event_legacy("ip_set_create", true);
>>>> 
>>>> This is the main issue I wanted to show you before continuing.
>>>> I cannot remove the kprobe event unless the obj is unloaded.
>>>> That is why I have this hard coded here, just because I was
>>>> testing. Any thoughts how to cleanup the kprobes without
>>>> jeopardising the API too much ?
>>> 
>>> cannot as in it doesn't work for whatever reason? Or what do you mean?
>>> 
>>> I see that you had bpf_link__detach_perf_event_legacy calling
>>> remove_kprobe_event_legacy, what didn't work?
>>> 
>> 
>> I’m sorry for not being very clear here. What happens is that, if I
>> try to remove the kprobe_event_legacy() BEFORE:
>> 
>> if (s->progs)
>>        bpf_object__detach_skeleton(s);
>> if (s->obj)
>>        bpf_object__close(*s->obj);
>> 
>> It fails with generic write error on kprobe_events file. I need to
>> remove legacy kprobe AFTER object closure. To workaround this on
>> my project, and to show you this issue, I have come up with:
>> 
>> void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
>> {
>>         int i, j;
>>         struct probeleft {
>>                 char *probename;
>>                 bool retprobe;
>>         } probesleft[24];
>> 
>>         for (i = 0, j = 0; i < s->prog_cnt; i++) {
>>                 struct bpf_link **link = s->progs[i].link;
>>                 if ((*link)->legacy.name) {
>>                         memset(&probesleft[j], 0, sizeof(struct probeleft));
>>                         probesleft[j].probename = strdup((*link)->legacy.name);
>>                         probesleft[j].retprobe = (*link)->legacy.retprobe;
>>                         j++;
>>                 }
>>         }
>> 
>>         if (s->progs)
>>                 bpf_object__detach_skeleton(s);
>>         if (s->obj)
>>                 bpf_object__close(*s->obj);
>>         free(s->maps);
>>         free(s->progs);
>>         free(s);
>> 
>>         for (j--; j >= 0; j--) {
>>                 remove_kprobe_event_legacy(probesleft[j].probename, probesleft[j].retprobe);
>>                 free(probesleft[j].probename);
>>         }
>> }
>> 
>> Which, of course, is not what I’m suggesting to the lib, but shows
>> the problem and gives you a better idea on how to solve it not
>> breaking the API.
>> 
> 
> bpf_link__destroy() callback should handle that, no? You'll close perf
> event FD, which will "free up" kprobe and you can do
> poke_kprobe_events(false /*remove */, ...). Or am I still missing
> something?

I could only poke_kprobe_events() to remove the kprobe after
bpf_oject__close(), or I would get an I/O error on kprobe_events.
Not sure if after map destroy or program exit.

-rafaeldtinoco


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-14 14:30             ` Rafael David Tinoco
@ 2021-04-14 20:06               ` Rafael David Tinoco
  2021-04-14 23:23               ` Andrii Nakryiko
  1 sibling, 0 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-04-14 20:06 UTC (permalink / raw)
  To: Rafael David Tinoco


> And kprobe perf events works fine without playing with them as long as:
> /sys/kernel/debug/tracing/kprobe_events is always 1 (should we enable
> it by default or consider it is enabled and don’t change its value ?).

Small correction: file /sys/kernel/debug/kprobes/enabled should
be always 1, and not /sys/kernel/debug/tracing/kprobe_events
(obviously).

-rafaeldtinoco

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-14 14:30             ` Rafael David Tinoco
  2021-04-14 20:06               ` Rafael David Tinoco
@ 2021-04-14 23:23               ` Andrii Nakryiko
  2021-04-15  5:53                 ` Rafael David Tinoco
  2021-06-25  4:44                 ` [PATCH bpf-next v3] " Rafael David Tinoco
  1 sibling, 2 replies; 24+ messages in thread
From: Andrii Nakryiko @ 2021-04-14 23:23 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: LKML BPF

On Wed, Apr 14, 2021 at 7:30 AM Rafael David Tinoco
<rafaeldtinoco@ubuntu.com> wrote:
>
> >
> >>> So I don't get at all why you have these toggles, especially
> >>> ALL_TOGGLE? You shouldn't try to determine the state of another probe.
> >>> You always know whether you want to enable or disable your specific
> >>> toggle. I'm very confused by all this.
> >>
> >> Yes, this was a confusing thing indeed and to be honest it proved to
> >> be very buggy when testing with conntracker. What I’ll do (or I’m
> >> doing) is to toggle ON to needed files before the probe is added:
> >>
> >> static inline int add_kprobe_event_legacy(const char* func_name, bool
> >> retprobe)
> >> {
> >>        int ret = 0;
> >>
> >>        ret |= poke_kprobe_events(true, func_name, retprobe);
> >>        ret |= toggle_kprobe_event_legacy_all(true);
> >>        ret |= toggle_single_kprobe_event_legacy(true, func_name, retprobe);
> >>
> >>        return ret;
> >> }
> >>
> >> 1) /sys/kernel/debug/tracing/kprobe_events => 1
> >> 2) /sys/kernel/debug/tracing/events/kprobes/enable => 1
> >> 3) /sys/kernel/debug/tracing/events/kprobes/%s/enable => 1
> >
> > Ok, hold on. I don't think we should use those /enable files,
> > actually. Double-checking what BCC does ([0]) and my local demo app I
> > wrote a while ago, we use perf_event_open() to activate kprobe, once
> > it is created, and that's all that is necessary.
> >
> >  [0] https://github.com/iovisor/bcc/blob/master/src/cc/libbpf.c#L1046
>
> No, they are not needed. Those are enabling ftrace kprobe feature:
>
> trace_events.c:
>     event_create_dir()
>         trace_create_file("enable") ->
>             ftrace_enable_fops():
>             .write = event_enable_write() -> ftrace_event_enable_disable()
>
> And kprobe perf events works fine without playing with them as long as:
> /sys/kernel/debug/tracing/kprobe_events is always 1 (should we enable
> it by default or consider it is enabled and don’t change its value ?).

I think considering it enabled is the right call, given that's what BCC does.

>
> >>
> >> Because of /sys/kernel/debug/tracing/events/kprobes/%s/enable. I’m
> >> toggling it to OFF before removing the kprobe in kprobe_events, like
> >> showed above.
> >
> > Alright, see above about enable files, it doesn't seem necessary,
> > actually. You use poke_kprobe_events() to add or remove kprobe to the
> > kernel. That gives you event_name and its id (from
> > /sys/kernel/debug/tracing/events/kprobes/%s/id). You then use that id
> > to create perf_event and activate BPF program:
>
> Yes, with a small reservation I just found out: function names might
> change because of GCC optimisations.. In my case I found out that:
>
> # cat /proc/kallsyms | grep udp_send_skb
> ffffffff8f9e0090 t udp_send_skb.isra.48
>
> udp_send_skb probe was not always working because the function name
> was changed. Then I saw BCC had this issue back in 2018 and is
> fixing it now:
>
> https://github.com/iovisor/bcc/issues/1754
> https://github.com/iovisor/bcc/pull/2930
>
> So I thought I could do the same: check if function name is the same
> in /proc/kallsyms or if it has changed and use the changed name if
> needed (to add to kprobe_events).
>
> Will include that logic and remove the ‘enables’.

No, please stop adding arbitrary additions. Function renames, .isra
optimizations, etc - that's all concerns of higher level, this API
should not try to be smart. It should try to attach to exactly the
kprobe specified.

>
> >
> > And that should be it. It doesn't seem like either BCC or my example
> > (which I'm sure worked last time) does anything with /enable files and
> > I'm sure all that works.
>
> First comment.
>
> >
> > [...]
> >
> >>>>>     return bpf_program__attach_kprobe(prog, retprobe, func_name);
> >>>>> }
> >>>>
> >>>> I’m assuming this is okay based on your saying of detecting a feature
> >>>> instead of using the if(x) if(y) approach.
> >>>>
> >>>>> @@ -11280,4 +11629,7 @@ void bpf_object__destroy_skeleton(struct
> >>>>> bpf_object_skeleton *s)
> >>>>>      free(s->maps);
> >>>>>      free(s->progs);(),
> >>>>>      free(s);
> >>>>> +
> >>>>> +     remove_kprobe_event_legacy("ip_set_create", false);
> >>>>> +     remove_kprobe_event_legacy("ip_set_create", true);
> >>>>
> >>>> This is the main issue I wanted to show you before continuing.
> >>>> I cannot remove the kprobe event unless the obj is unloaded.
> >>>> That is why I have this hard coded here, just because I was
> >>>> testing. Any thoughts how to cleanup the kprobes without
> >>>> jeopardising the API too much ?
> >>>
> >>> cannot as in it doesn't work for whatever reason? Or what do you mean?
> >>>
> >>> I see that you had bpf_link__detach_perf_event_legacy calling
> >>> remove_kprobe_event_legacy, what didn't work?
> >>>
> >>
> >> I’m sorry for not being very clear here. What happens is that, if I
> >> try to remove the kprobe_event_legacy() BEFORE:
> >>
> >> if (s->progs)
> >>        bpf_object__detach_skeleton(s);
> >> if (s->obj)
> >>        bpf_object__close(*s->obj);
> >>
> >> It fails with generic write error on kprobe_events file. I need to
> >> remove legacy kprobe AFTER object closure. To workaround this on
> >> my project, and to show you this issue, I have come up with:
> >>
> >> void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
> >> {
> >>         int i, j;
> >>         struct probeleft {
> >>                 char *probename;
> >>                 bool retprobe;
> >>         } probesleft[24];
> >>
> >>         for (i = 0, j = 0; i < s->prog_cnt; i++) {
> >>                 struct bpf_link **link = s->progs[i].link;
> >>                 if ((*link)->legacy.name) {
> >>                         memset(&probesleft[j], 0, sizeof(struct probeleft));
> >>                         probesleft[j].probename = strdup((*link)->legacy.name);
> >>                         probesleft[j].retprobe = (*link)->legacy.retprobe;
> >>                         j++;
> >>                 }
> >>         }
> >>
> >>         if (s->progs)
> >>                 bpf_object__detach_skeleton(s);
> >>         if (s->obj)
> >>                 bpf_object__close(*s->obj);
> >>         free(s->maps);
> >>         free(s->progs);
> >>         free(s);
> >>
> >>         for (j--; j >= 0; j--) {
> >>                 remove_kprobe_event_legacy(probesleft[j].probename, probesleft[j].retprobe);
> >>                 free(probesleft[j].probename);
> >>         }
> >> }
> >>
> >> Which, of course, is not what I’m suggesting to the lib, but shows
> >> the problem and gives you a better idea on how to solve it not
> >> breaking the API.
> >>
> >
> > bpf_link__destroy() callback should handle that, no? You'll close perf
> > event FD, which will "free up" kprobe and you can do
> > poke_kprobe_events(false /*remove */, ...). Or am I still missing
> > something?
>
> I could only poke_kprobe_events() to remove the kprobe after
> bpf_oject__close(), or I would get an I/O error on kprobe_events.
> Not sure if after map destroy or program exit.

Did you figure out why? What's causing an error?

>
> -rafaeldtinoco
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-14 23:23               ` Andrii Nakryiko
@ 2021-04-15  5:53                 ` Rafael David Tinoco
  2021-04-15 22:48                   ` Andrii Nakryiko
  2021-06-25  4:44                 ` [PATCH bpf-next v3] " Rafael David Tinoco
  1 sibling, 1 reply; 24+ messages in thread
From: Rafael David Tinoco @ 2021-04-15  5:53 UTC (permalink / raw)
  To: Andrii Nakryiko


>> Yes, with a small reservation I just found out: function names might
>> change because of GCC optimisations.. In my case I found out that:
>> 
>> # cat /proc/kallsyms | grep udp_send_skb
>> ffffffff8f9e0090 t udp_send_skb.isra.48
>> 
>> udp_send_skb probe was not always working because the function name
>> was changed. Then I saw BCC had this issue back in 2018 and is
>> fixing it now:
>> 
>> https://github.com/iovisor/bcc/issues/1754
>> https://github.com/iovisor/bcc/pull/2930
>> 
>> So I thought I could do the same: check if function name is the same
>> in /proc/kallsyms or if it has changed and use the changed name if
>> needed (to add to kprobe_events).
>> 
>> Will include that logic and remove the ‘enables’.
> 
> No, please stop adding arbitrary additions. Function renames, .isra
> optimizations, etc - that's all concerns of higher level, this API
> should not try to be smart. It should try to attach to exactly the
> kprobe specified.

:\ how can this be done in a higher level if it needs to be done
runtime at the time the event is being enabled ? skel will contain
hardcoded kprobe names and won’t be able to get runtime optimised
symbol names, correct ? (unless bpftool gen generates an intermediate
code to check kallsyms and solve those symbols before calling the lib).

I see BCC has some options for regexing symbol names for the probes… 
obviously in BCC’s case is simpler. 

I made it work by checking kallsyms for the exact symbol and,
if not found, for the variants (only for the legacy kprobe event
probe, but it would work for the current one, passing discovered
symbol name for the ioctl attrs. My WIP version (to clarify what I’m
saying):

https://paste.ubuntu.com/p/DpqDsGdVff/

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support
  2021-04-15  5:53                 ` Rafael David Tinoco
@ 2021-04-15 22:48                   ` Andrii Nakryiko
  0 siblings, 0 replies; 24+ messages in thread
From: Andrii Nakryiko @ 2021-04-15 22:48 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: bpf

On Wed, Apr 14, 2021 at 10:53 PM Rafael David Tinoco
<rafaeldtinoco@ubuntu.com> wrote:
>
>
> >> Yes, with a small reservation I just found out: function names might
> >> change because of GCC optimisations.. In my case I found out that:
> >>
> >> # cat /proc/kallsyms | grep udp_send_skb
> >> ffffffff8f9e0090 t udp_send_skb.isra.48
> >>
> >> udp_send_skb probe was not always working because the function name
> >> was changed. Then I saw BCC had this issue back in 2018 and is
> >> fixing it now:
> >>
> >> https://github.com/iovisor/bcc/issues/1754
> >> https://github.com/iovisor/bcc/pull/2930
> >>
> >> So I thought I could do the same: check if function name is the same
> >> in /proc/kallsyms or if it has changed and use the changed name if
> >> needed (to add to kprobe_events).
> >>
> >> Will include that logic and remove the ‘enables’.
> >
> > No, please stop adding arbitrary additions. Function renames, .isra
> > optimizations, etc - that's all concerns of higher level, this API
> > should not try to be smart. It should try to attach to exactly the
> > kprobe specified.
>
> :\ how can this be done in a higher level if it needs to be done
> runtime at the time the event is being enabled ? skel will contain
> hardcoded kprobe names and won’t be able to get runtime optimised
> symbol names, correct ? (unless bpftool gen generates an intermediate
> code to check kallsyms and solve those symbols before calling the lib).

user-space code can specify whichever kernel function name it needs
with direct call to bpf_program__attach_kprobe().
bpf_program__attach_kprobe() should attempt to attach *exactly* to the
function specified, even if it doesn't exist. That's not the place to
make any substitutions, otherwise that API will become an unreliable
mess.

>
> I see BCC has some options for regexing symbol names for the probes…
> obviously in BCC’s case is simpler.

That regexing can be built on top of bpf_program__attach_kprobe(), if
necessary. If user says 'attach to X', API should attach to "X", not
"X.something".

>
> I made it work by checking kallsyms for the exact symbol and,
> if not found, for the variants (only for the legacy kprobe event
> probe, but it would work for the current one, passing discovered
> symbol name for the ioctl attrs. My WIP version (to clarify what I’m
> saying):
>
> https://paste.ubuntu.com/p/DpqDsGdVff/

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-04-14 23:23               ` Andrii Nakryiko
  2021-04-15  5:53                 ` Rafael David Tinoco
@ 2021-06-25  4:44                 ` Rafael David Tinoco
  2021-06-25  5:01                   ` Rafael David Tinoco
                                     ` (2 more replies)
  1 sibling, 3 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-06-25  4:44 UTC (permalink / raw)
  To: bpf; +Cc: rafaeldtinoco, andrii.nakryiko

Allow kprobe tracepoint events creation through legacy interface, as the
kprobe dynamic PMUs support, used by default, was only created in v4.17.

This enables CO.RE support for older kernels.

Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>
---
 tools/lib/bpf/libbpf.c | 125 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1e04ce724240..72a22c4d8295 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10007,6 +10007,10 @@ struct bpf_link {
 	char *pin_path;		/* NULL, if not pinned */
 	int fd;			/* hook FD, -1 if not applicable */
 	bool disconnected;
+	struct {
+		char *name;
+		bool retprobe;
+	} legacy;
 };
 
 /* Replace link's underlying BPF program with the new one */
@@ -10143,6 +10147,47 @@ int bpf_link__unpin(struct bpf_link *link)
 	return 0;
 }
 
+static int poke_kprobe_events(bool add, const char *name, bool retprobe)
+{
+	int fd, ret = 0;
+	char probename[32], cmd[160];
+	const char *file = "/sys/kernel/debug/tracing/kprobe_events";
+
+	memset(probename, 0, sizeof(probename));
+
+	if (retprobe)
+		ret = snprintf(probename, sizeof(probename), "kprobes/%s_ret", name);
+	else
+		ret = snprintf(probename, sizeof(probename), "kprobes/%s", name);
+
+	if (ret <= strlen("kprobes/"))
+		return -EINVAL;
+
+	if (add)
+		snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p', probename, name);
+	else
+		snprintf(cmd, sizeof(cmd), "-:%s", probename);
+
+	if (!(fd = open(file, O_WRONLY|O_APPEND, 0)))
+		return -errno;
+	if ((ret = write(fd, cmd, strlen(cmd))) < 0)
+		ret = -errno;
+
+	close(fd);
+
+	return ret;
+}
+
+static inline int add_kprobe_event_legacy(const char* name, bool retprobe)
+{
+	return poke_kprobe_events(true, name, retprobe);
+}
+
+static inline int remove_kprobe_event_legacy(const char* name, bool retprobe)
+{
+	return poke_kprobe_events(false, name, retprobe);
+}
+
 static int bpf_link__detach_perf_event(struct bpf_link *link)
 {
 	int err;
@@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
 		err = -errno;
 
 	close(link->fd);
+
+	if (link->legacy.name) {
+		remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
+		free(link->legacy.name);
+	}
+
 	return libbpf_err(err);
 }
 
@@ -10229,6 +10280,23 @@ static int parse_uint_from_file(const char *file, const char *fmt)
 	return ret;
 }
 
+static bool determine_kprobe_legacy(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+	return access(file, 0) == 0 ? false : true;
+}
+
+static int determine_kprobe_perf_type_legacy(const char *func_name)
+{
+	char file[96];
+	const char *fname = "/sys/kernel/debug/tracing/events/kprobes/%s/id";
+
+	snprintf(file, sizeof(file), fname, func_name);
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
 static int determine_kprobe_perf_type(void)
 {
 	const char *file = "/sys/bus/event_source/devices/kprobe/type";
@@ -10304,6 +10372,43 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 	return pfd;
 }
 
+static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
+					uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	if (uprobe) // unsupported
+		return -EINVAL;
+
+	if ((err = add_kprobe_event_legacy(name, retprobe)) < 0) {
+		pr_warn("failed to add legacy kprobe event: %s\n",
+		libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	if ((type = determine_kprobe_perf_type_legacy(name)) < 0) {
+		pr_warn("failed to determine legacy kprobe event id: %s\n",
+		libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	attr.size = sizeof(attr);
+	attr.config = type;
+	attr.type = PERF_TYPE_TRACEPOINT;
+
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid, /* pid */
+		      pid == -1 ? 0 : -1, /* cpu */
+		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 					    bool retprobe,
 					    const char *func_name)
@@ -10311,9 +10416,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
 	int pfd, err;
+	bool legacy = false;
 
-	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
-				    0 /* offset */, -1 /* pid */);
+	if (!(legacy = determine_kprobe_legacy()))
+		pfd = perf_event_open_probe(false /* uprobe */,
+					    retprobe, func_name,
+					     0 /* offset */,
+					    -1 /* pid */);
+	else
+		pfd = perf_event_open_probe_legacy(false /* uprobe */,
+					    retprobe, func_name,
+					     0 /* offset */,
+					    -1 /* pid */);
 	if (pfd < 0) {
 		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
@@ -10329,6 +10443,13 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 		return libbpf_err_ptr(err);
 	}
+
+	if (legacy) {
+		/* needed history for the legacy probe cleanup */
+		link->legacy.name = strdup(func_name);
+		link->legacy.retprobe = retprobe;
+	}
+
 	return link;
 }
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-06-25  4:44                 ` [PATCH bpf-next v3] " Rafael David Tinoco
@ 2021-06-25  5:01                   ` Rafael David Tinoco
  2021-07-07 13:38                   ` Rafael David Tinoco
  2021-07-07 21:52                   ` Andrii Nakryiko
  2 siblings, 0 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-06-25  5:01 UTC (permalink / raw)
  To: LKML BPF; +Cc: Andrii Nakryiko, Rafael David Tinoco


> Allow kprobe tracepoint events creation through legacy interface, as the
> kprobe dynamic PMUs support, used by default, was only created in v4.17.
> 
> This enables CO.RE support for older kernels.
> 
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>

Related to:
https://github.com/libbpf/libbpf/issues/317

> ---
> tools/lib/bpf/libbpf.c | 125 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 123 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 1e04ce724240..72a22c4d8295 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c

[snip]

> static int bpf_link__detach_perf_event(struct bpf_link *link)
> {
> 	int err;
> @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> 		err = -errno;
> 
> 	close(link->fd);

It needed the perf event fd closure for the ‘kprobe_events’ to allow releasing.

> +
> +	if (link->legacy.name) {
> +		remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> +		free(link->legacy.name);
> +	}
> +
> 	return libbpf_err(err);
> }

[snip]

Tested with: https://github.com/rafaeldtinoco/portablebpf (w/ CO.RE) in kernels 5.8 and 4.15.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-06-25  4:44                 ` [PATCH bpf-next v3] " Rafael David Tinoco
  2021-06-25  5:01                   ` Rafael David Tinoco
@ 2021-07-07 13:38                   ` Rafael David Tinoco
  2021-07-07 21:52                   ` Andrii Nakryiko
  2 siblings, 0 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-07-07 13:38 UTC (permalink / raw)
  To: bpf; +Cc: andrii.nakryiko

On Fri, Jun 25, 2021, at 01:44, Rafael David Tinoco wrote:
> Allow kprobe tracepoint events creation through legacy interface, as the
> kprobe dynamic PMUs support, used by default, was only created in v4.17.
> 
> This enables CO.RE support for older kernels.
> 
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>

ping

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-06-25  4:44                 ` [PATCH bpf-next v3] " Rafael David Tinoco
  2021-06-25  5:01                   ` Rafael David Tinoco
  2021-07-07 13:38                   ` Rafael David Tinoco
@ 2021-07-07 21:52                   ` Andrii Nakryiko
  2021-07-19  1:59                     ` Rafael David Tinoco
  2 siblings, 1 reply; 24+ messages in thread
From: Andrii Nakryiko @ 2021-07-07 21:52 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: bpf

On Thu, Jun 24, 2021 at 9:45 PM Rafael David Tinoco
<rafaeldtinoco@gmail.com> wrote:
>
> Allow kprobe tracepoint events creation through legacy interface, as the
> kprobe dynamic PMUs support, used by default, was only created in v4.17.
>
> This enables CO.RE support for older kernels.
>
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>
> ---

Sorry for the delay, I've been out on vacation for the last two weeks.

>  tools/lib/bpf/libbpf.c | 125 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 123 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 1e04ce724240..72a22c4d8295 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10007,6 +10007,10 @@ struct bpf_link {
>         char *pin_path;         /* NULL, if not pinned */
>         int fd;                 /* hook FD, -1 if not applicable */
>         bool disconnected;
> +       struct {
> +               char *name;
> +               bool retprobe;
> +       } legacy;

we shouldn't extend common bpf_link with kprobe-specific parts. We
used to have something like this (for other use cases):

struct bpf_link_kprobe {
    struct bpf_link link;
    char *legacy_name;
    bool is_retprobe;
};

And then internally do container_of() to "cast" struct bpf_link to
struct bpf_link_kprobe. External API should still operate on struct
bpf_link everywhere.

>  };
>
>  /* Replace link's underlying BPF program with the new one */
> @@ -10143,6 +10147,47 @@ int bpf_link__unpin(struct bpf_link *link)
>         return 0;
>  }
>
> +static int poke_kprobe_events(bool add, const char *name, bool retprobe)
> +{
> +       int fd, ret = 0;
> +       char probename[32], cmd[160];
> +       const char *file = "/sys/kernel/debug/tracing/kprobe_events";
> +
> +       memset(probename, 0, sizeof(probename));

nit: char probename[32] = {} instead of explicit memset

32 seems pretty small, though, is that kernel restriction? if not,
let's maybe bump it to 128 or so?

> +
> +       if (retprobe)
> +               ret = snprintf(probename, sizeof(probename), "kprobes/%s_ret", name);
> +       else
> +               ret = snprintf(probename, sizeof(probename), "kprobes/%s", name);

BCC seems to be adding _bcc_<pid> prefix, so maybe let's do the same
here with s/bcc/libbpf/?

> +
> +       if (ret <= strlen("kprobes/"))
> +               return -EINVAL;

why would snprintf() fail at all?

> +
> +       if (add)
> +               snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p', probename, name);

missing space before "

> +       else
> +               snprintf(cmd, sizeof(cmd), "-:%s", probename);
> +
> +       if (!(fd = open(file, O_WRONLY|O_APPEND, 0)))

let's not do these assignments inside if, we are not trying to save
lines of code here, but it's harder to read

also need spaces around operator |

> +               return -errno;
> +       if ((ret = write(fd, cmd, strlen(cmd))) < 0)
> +               ret = -errno;
> +
> +       close(fd);
> +
> +       return ret;
> +}
> +
> +static inline int add_kprobe_event_legacy(const char* name, bool retprobe)

char *name

> +{
> +       return poke_kprobe_events(true, name, retprobe);
> +}
> +
> +static inline int remove_kprobe_event_legacy(const char* name, bool retprobe)

char *name

> +{
> +       return poke_kprobe_events(false, name, retprobe);
> +}
> +
>  static int bpf_link__detach_perf_event(struct bpf_link *link)
>  {
>         int err;
> @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
>                 err = -errno;
>
>         close(link->fd);
> +
> +       if (link->legacy.name) {
> +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> +               free(link->legacy.name);
> +       }

instead of this check in bpf_link__detach_perf_event, attach_kprobe
should install its own bpf_link__detach_kprobe_legacy callback

> +
>         return libbpf_err(err);
>  }
>
> @@ -10229,6 +10280,23 @@ static int parse_uint_from_file(const char *file, const char *fmt)
>         return ret;
>  }
>
> +static bool determine_kprobe_legacy(void)
> +{
> +       const char *file = "/sys/bus/event_source/devices/kprobe/type";
> +
> +       return access(file, 0) == 0 ? false : true;
> +}
> +
> +static int determine_kprobe_perf_type_legacy(const char *func_name)
> +{
> +       char file[96];
> +       const char *fname = "/sys/kernel/debug/tracing/events/kprobes/%s/id";
> +
> +       snprintf(file, sizeof(file), fname, func_name);
> +
> +       return parse_uint_from_file(file, "%d\n");
> +}
> +
>  static int determine_kprobe_perf_type(void)
>  {
>         const char *file = "/sys/bus/event_source/devices/kprobe/type";
> @@ -10304,6 +10372,43 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>         return pfd;
>  }
>
> +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> +                                       uint64_t offset, int pid)

you are not using offset here, let's pass it into
add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
%s+123" in poke_kprobe_events? There are separate patches that are
adding ability to attach kprobe at offset, so let's support that
(internally) from the get go for legacy case as well.

also, it's not generic perf_event_open, it's specifically kprobe, so
let's call it with kprobe in the name (e.g., kprobe_open_legacy or
something)

> +{
> +       struct perf_event_attr attr = {};
> +       char errmsg[STRERR_BUFSIZE];
> +       int type, pfd, err;
> +
> +       if (uprobe) // unsupported

please don't use C++ style comments

> +               return -EINVAL;

was about to suggest using -ENOTSUP instead, but we shouldn't even
pass bool uprobe, as it's not yet used in uprobes. We can add it
later, if necessary.

> +
> +       if ((err = add_kprobe_event_legacy(name, retprobe)) < 0) {

same, here and below, let's not do unnecessary assignments inside the if clause

> +               pr_warn("failed to add legacy kprobe event: %s\n",
> +               libbpf_strerror_r(err, errmsg, sizeof(errmsg)));

align with the "failed ..." argument, it looks like independent
statement, not an argument

> +               return err;
> +       }
> +       if ((type = determine_kprobe_perf_type_legacy(name)) < 0) {
> +               pr_warn("failed to determine legacy kprobe event id: %s\n",
> +               libbpf_strerror_r(type, errmsg, sizeof(errmsg)));

same as above

> +               return type;
> +       }
> +       attr.size = sizeof(attr);
> +       attr.config = type;
> +       attr.type = PERF_TYPE_TRACEPOINT;
> +
> +       pfd = syscall(__NR_perf_event_open, &attr,
> +                     pid < 0 ? -1 : pid, /* pid */
> +                     pid == -1 ? 0 : -1, /* cpu */
> +                     -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
> +       if (pfd < 0) {
> +               err = -errno;
> +               pr_warn("legacy kprobe perf_event_open() failed: %s\n",
> +                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +               return err;
> +       }
> +       return pfd;
> +}
> +
>  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>                                             bool retprobe,
>                                             const char *func_name)
> @@ -10311,9 +10416,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>         char errmsg[STRERR_BUFSIZE];
>         struct bpf_link *link;
>         int pfd, err;
> +       bool legacy = false;

no need to initialize it, you unconditionally assign it below

>
> -       pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
> -                                   0 /* offset */, -1 /* pid */);
> +       if (!(legacy = determine_kprobe_legacy()))

another assignment in if

> +               pfd = perf_event_open_probe(false /* uprobe */,
> +                                           retprobe, func_name,
> +                                            0 /* offset */,
> +                                           -1 /* pid */);
> +       else
> +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> +                                           retprobe, func_name,
> +                                            0 /* offset */,
> +                                           -1 /* pid */);
>         if (pfd < 0) {
>                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
>                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,

we can't use bpf_program__attach_perf_event as is now, because we need
to allocate a different struct bpf_link_kprobe. Let's extract the
PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
and use it from both bpf_program__attach_perf_event and
bpf_program__attach_kprobe. It's actually good because we can check
silly errors (like prog_fd < 0) before we create perf_event FD now.

> @@ -10329,6 +10443,13 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>                 return libbpf_err_ptr(err);
>         }
> +
> +       if (legacy) {
> +               /* needed history for the legacy probe cleanup */
> +               link->legacy.name = strdup(func_name);
> +               link->legacy.retprobe = retprobe;
> +       }
> +
>         return link;
>  }
>
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-07-07 21:52                   ` Andrii Nakryiko
@ 2021-07-19  1:59                     ` Rafael David Tinoco
  2021-07-20  0:10                       ` Andrii Nakryiko
  0 siblings, 1 reply; 24+ messages in thread
From: Rafael David Tinoco @ 2021-07-19  1:59 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: bpf

> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -10007,6 +10007,10 @@ struct bpf_link {
> >         char *pin_path;         /* NULL, if not pinned */
> >         int fd;                 /* hook FD, -1 if not applicable */
> >         bool disconnected;
> > +       struct {
> > +               char *name;
> > +               bool retprobe;
> > +       } legacy;
>
> we shouldn't extend common bpf_link with kprobe-specific parts. We
> used to have something like this (for other use cases):
>
> struct bpf_link_kprobe {
>     struct bpf_link link;
>     char *legacy_name;
>     bool is_retprobe;
> };

would this:

struct bpf_link {
    int (*detach)(struct bpf_link *link);
    int (*destroy)(struct bpf_link *link);
    char *pin_path;
    int fd;
    bool disconnected;
};

struct bpf_link_kprobe {
    char *legacy_name;
    bool is_retprobe;
    struct bpf_link *link;
};

be ok ?

> And then internally do container_of() to "cast" struct bpf_link to
> struct bpf_link_kprobe. External API should still operate on struct
> bpf_link everywhere.

and what about this:

static struct bpf_link*
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
                                const char *func_name,
                                struct bpf_program_attach_kprobe_opts *opts)
{
	char errmsg[STRERR_BUFSIZE];
	struct bpf_link_kprobe *kplink;
	int pfd, err;
	bool legacy;

	legacy = determine_kprobe_legacy();
	if (!legacy) {
		pfd = perf_event_open_probe(false /* uprobe */,
		                            opts->retprobe,
		                            func_name,
		                            0 /* offset */,
		                            -1 /* pid */);
	} else {
		pfd = perf_event_open_kprobe_legacy(opts->retprobe,
		                                    func_name,
		                                    0 /* offset */,
		                                    -1 /* pid */);
	}
	if (pfd < 0) {
		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
		        prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
		        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
		return libbpf_err_ptr(pfd);
	}
	kplink = calloc(1, sizeof(struct bpf_link_kprobe));
	if (!kplink)
		return libbpf_err_ptr(-ENOMEM);
	kplink->link = bpf_program__attach_perf_event(prog, pfd);
	err = libbpf_get_error(link);
	if (err) {
		free(kplink);
		close(pfd);
		pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
		        prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
		        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
		return libbpf_err_ptr(err);
	}
	if (legacy) {
		kplink->legacy_name = strdup(func_name);
		kplink->is_retprobe = opts->retprobe;
	}

	return kplink->link;
}

And use this 'kplink->link' pointer as the bpf_link pointer for all kprobe
functions. For the detachment we would have something like:

static int bpf_link__detach_perf_event(struct bpf_link *link) {
	struct bpf_link *const *plink;
	struct bpf_link_kprobe *kplink;
	int err;

	plink = (struct bpf_link *const *) link;
	kplink = container_of(plink, struct bpf_link_kprobe, link);
	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
	if (err)
		err = -errno;
	close(link->fd);
	if (kplink) {
		remove_kprobe_event_legacy(kplink->legacy_name, kplink->is_retprobe);
		free(kplink->legacy_name);
		free(kplink);
	}

	return libbpf_err(err);
}

for the bpf_link__detach_perf_event(): This would also clean the container at
the detachment. (Next comment talks about having this here versus having a
legacy kprobe detachment callback).

[snip]

> >  static int bpf_link__detach_perf_event(struct bpf_link *link)
> >  {
> >         int err;
> > @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> >                 err = -errno;
> >
> >         close(link->fd);
> > +
> > +       if (link->legacy.name) {
> > +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> > +               free(link->legacy.name);
> > +       }
>
> instead of this check in bpf_link__detach_perf_event, attach_kprobe
> should install its own bpf_link__detach_kprobe_legacy callback

attach_kprobe_opts() could pass a pointer to link->detach->callback through the
opts I suppose (or now, the kplink->link->detach->callback). This way the
default would still be bpf_link__detach_perf_event() but we could create a
function bpf_link__detach_perf_event_legacy_kprobe() with what was previously
showed (about kplink freeing). This is not needed with the version showed
before the [snip] though.

> > +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> > +                                       uint64_t offset, int pid)
>
> you are not using offset here, let's pass it into
> add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
> %s+123" in poke_kprobe_events? There are separate patches that are
> adding ability to attach kprobe at offset, so let's support that
> (internally) from the get go for legacy case as well.
>
> also, it's not generic perf_event_open, it's specifically kprobe, so
> let's call it with kprobe in the name (e.g., kprobe_open_legacy or
> something)

I'm calling it now perf_event_open_kprobe_legacy() and it calls:

static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset)
{
	return poke_kprobe_events(true, name, retprobe, offset);
}

and then we set {kprobes/kretprobes}/funcname_pid, also supporting offset:

static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) {
	int fd, ret = 0;
	char cmd[192] = {}, probename[128] = {}, probefunc[128] = {};
	const char *file = "/sys/kernel/debug/tracing/kprobe_events";

	if (retprobe)
		ret = snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, getpid());
	else
		ret = snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, getpid());
	if (offset)
		ret = snprintf(probefunc, sizeof(probefunc), "%s+%lu", name, offset);
	if (ret)
		return -EINVAL;
	if (add) {
		snprintf(cmd, sizeof(cmd), "%c:%s %s",
				 retprobe ? 'r' : 'p',
				 probename,
		         offset ? probefunc : name);
	} else {
		snprintf(cmd, sizeof(cmd), "-:%s", probename);
	}
	fd = open(file, O_WRONLY | O_APPEND, 0);
	if (!fd)
		return -errno;
	ret = write(fd, cmd, strlen(cmd));
	if (ret)
		ret = -errno;
	close(fd);

	return ret;
}

[snip]

> > +               pfd = perf_event_open_probe(false /* uprobe */,
> > +                                           retprobe, func_name,
> > +                                            0 /* offset */,
> > +                                           -1 /* pid */);
> > +       else
> > +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> > +                                           retprobe, func_name,
> > +                                            0 /* offset */,
> > +                                           -1 /* pid */);
> >         if (pfd < 0) {
> >                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
> >                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
>
> we can't use bpf_program__attach_perf_event as is now, because we need
> to allocate a different struct bpf_link_kprobe.

We could do the container encapsulation using heap in
bpf_program_attach_kprobe_opts() or attach_kprobe() like I'm showing here, no ?

...
	kplink = calloc(1, sizeof(struct bpf_link_kprobe));
	if (!kplink)
		return libbpf_err_ptr(-ENOMEM);
	kplink->link = bpf_program__attach_perf_event(prog, pfd);
...

and then free all this structure (bpf_link and its encapsulation at the
detachment, like said previously also). This way we don't have to change
bpf_program__attach_perf_event() which would continue to serve
bpf_program__attach_tracepoint() and bpf_program__attach_uprobe() unmodified.
This way, kprobe would have a container for all cases and uprobe and tracepoint
could have a container in the future if needed.

> Let's extract the
> PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
> and use it from both bpf_program__attach_perf_event and
> bpf_program__attach_kprobe. It's actually good because we can check
> silly errors (like prog_fd < 0) before we create perf_event FD now.

Okay, but I'm considering this orthogonal to what you said previously (on
changing bpf_program__attach_perf_event). UNLESS you really prefer me to do the
container allocation in bpf_program__attach_perf_event() but then we would have
to free the container in all detachments (kprobe, tracepoint and uprobe) as it
couldn't be placed in stack (or it would eventually be lost, no ?).

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-07-19  1:59                     ` Rafael David Tinoco
@ 2021-07-20  0:10                       ` Andrii Nakryiko
  2021-07-20  4:16                         ` Rafael David Tinoco
  0 siblings, 1 reply; 24+ messages in thread
From: Andrii Nakryiko @ 2021-07-20  0:10 UTC (permalink / raw)
  To: Rafael David Tinoco; +Cc: bpf

On Sun, Jul 18, 2021 at 6:59 PM Rafael David Tinoco
<rafaeldtinoco@gmail.com> wrote:
>
> > > --- a/tools/lib/bpf/libbpf.c
> > > +++ b/tools/lib/bpf/libbpf.c
> > > @@ -10007,6 +10007,10 @@ struct bpf_link {
> > >         char *pin_path;         /* NULL, if not pinned */
> > >         int fd;                 /* hook FD, -1 if not applicable */
> > >         bool disconnected;
> > > +       struct {
> > > +               char *name;
> > > +               bool retprobe;
> > > +       } legacy;
> >
> > we shouldn't extend common bpf_link with kprobe-specific parts. We
> > used to have something like this (for other use cases):
> >
> > struct bpf_link_kprobe {
> >     struct bpf_link link;
> >     char *legacy_name;
> >     bool is_retprobe;
> > };
>
> would this:
>
> struct bpf_link {
>     int (*detach)(struct bpf_link *link);
>     int (*destroy)(struct bpf_link *link);
>     char *pin_path;
>     int fd;
>     bool disconnected;
> };
>
> struct bpf_link_kprobe {
>     char *legacy_name;
>     bool is_retprobe;
>     struct bpf_link *link;
> };
>
> be ok ?

No.

>
> > And then internally do container_of() to "cast" struct bpf_link to
> > struct bpf_link_kprobe. External API should still operate on struct
> > bpf_link everywhere.
>
> and what about this:
>
> static struct bpf_link*
> bpf_program__attach_kprobe_opts(struct bpf_program *prog,
>                                 const char *func_name,
>                                 struct bpf_program_attach_kprobe_opts *opts)
> {
>         char errmsg[STRERR_BUFSIZE];
>         struct bpf_link_kprobe *kplink;
>         int pfd, err;
>         bool legacy;
>
>         legacy = determine_kprobe_legacy();
>         if (!legacy) {
>                 pfd = perf_event_open_probe(false /* uprobe */,
>                                             opts->retprobe,
>                                             func_name,
>                                             0 /* offset */,
>                                             -1 /* pid */);
>         } else {
>                 pfd = perf_event_open_kprobe_legacy(opts->retprobe,
>                                                     func_name,
>                                                     0 /* offset */,
>                                                     -1 /* pid */);
>         }
>         if (pfd < 0) {
>                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
>                         prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
>                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
>                 return libbpf_err_ptr(pfd);
>         }
>         kplink = calloc(1, sizeof(struct bpf_link_kprobe));
>         if (!kplink)
>                 return libbpf_err_ptr(-ENOMEM);
>         kplink->link = bpf_program__attach_perf_event(prog, pfd);
>         err = libbpf_get_error(link);
>         if (err) {
>                 free(kplink);
>                 close(pfd);
>                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
>                         prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
>                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>                 return libbpf_err_ptr(err);
>         }
>         if (legacy) {
>                 kplink->legacy_name = strdup(func_name);
>                 kplink->is_retprobe = opts->retprobe;
>         }
>
>         return kplink->link;
> }
>
> And use this 'kplink->link' pointer as the bpf_link pointer for all kprobe
> functions. For the detachment we would have something like:
>
> static int bpf_link__detach_perf_event(struct bpf_link *link) {
>         struct bpf_link *const *plink;
>         struct bpf_link_kprobe *kplink;
>         int err;
>
>         plink = (struct bpf_link *const *) link;
>         kplink = container_of(plink, struct bpf_link_kprobe, link);

Did you check if this works? Also how that could ever even work for
non-kprobe but perf_event-based cases, like tracepoint? And why are we
even discussing these "alternatives"? What's wrong with the way I
proposed earlier? For container_of() to work you have to do it the way
I described in my previous email with one struct being embedded in
another one.

>         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
>         if (err)
>                 err = -errno;
>         close(link->fd);
>         if (kplink) {
>                 remove_kprobe_event_legacy(kplink->legacy_name, kplink->is_retprobe);
>                 free(kplink->legacy_name);
>                 free(kplink);
>         }
>
>         return libbpf_err(err);
> }
>
> for the bpf_link__detach_perf_event(): This would also clean the container at
> the detachment. (Next comment talks about having this here versus having a
> legacy kprobe detachment callback).
>
> [snip]
>
> > >  static int bpf_link__detach_perf_event(struct bpf_link *link)
> > >  {
> > >         int err;
> > > @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> > >                 err = -errno;
> > >
> > >         close(link->fd);
> > > +
> > > +       if (link->legacy.name) {
> > > +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> > > +               free(link->legacy.name);
> > > +       }
> >
> > instead of this check in bpf_link__detach_perf_event, attach_kprobe
> > should install its own bpf_link__detach_kprobe_legacy callback
>
> attach_kprobe_opts() could pass a pointer to link->detach->callback through the
> opts I suppose (or now, the kplink->link->detach->callback). This way the
> default would still be bpf_link__detach_perf_event() but we could create a
> function bpf_link__detach_perf_event_legacy_kprobe() with what was previously
> showed (about kplink freeing). This is not needed with the version showed
> before the [snip] though.
>
> > > +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> > > +                                       uint64_t offset, int pid)
> >
> > you are not using offset here, let's pass it into
> > add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
> > %s+123" in poke_kprobe_events? There are separate patches that are
> > adding ability to attach kprobe at offset, so let's support that
> > (internally) from the get go for legacy case as well.
> >
> > also, it's not generic perf_event_open, it's specifically kprobe, so
> > let's call it with kprobe in the name (e.g., kprobe_open_legacy or
> > something)
>
> I'm calling it now perf_event_open_kprobe_legacy() and it calls:
>
> static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset)
> {
>         return poke_kprobe_events(true, name, retprobe, offset);
> }
>
> and then we set {kprobes/kretprobes}/funcname_pid, also supporting offset:
>
> static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) {
>         int fd, ret = 0;
>         char cmd[192] = {}, probename[128] = {}, probefunc[128] = {};
>         const char *file = "/sys/kernel/debug/tracing/kprobe_events";
>
>         if (retprobe)
>                 ret = snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, getpid());
>         else
>                 ret = snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, getpid());
>         if (offset)
>                 ret = snprintf(probefunc, sizeof(probefunc), "%s+%lu", name, offset);
>         if (ret)
>                 return -EINVAL;
>         if (add) {
>                 snprintf(cmd, sizeof(cmd), "%c:%s %s",
>                                  retprobe ? 'r' : 'p',
>                                  probename,
>                          offset ? probefunc : name);
>         } else {
>                 snprintf(cmd, sizeof(cmd), "-:%s", probename);
>         }
>         fd = open(file, O_WRONLY | O_APPEND, 0);
>         if (!fd)
>                 return -errno;
>         ret = write(fd, cmd, strlen(cmd));
>         if (ret)
>                 ret = -errno;
>         close(fd);
>
>         return ret;
> }
>
> [snip]
>
> > > +               pfd = perf_event_open_probe(false /* uprobe */,
> > > +                                           retprobe, func_name,
> > > +                                            0 /* offset */,
> > > +                                           -1 /* pid */);
> > > +       else
> > > +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> > > +                                           retprobe, func_name,
> > > +                                            0 /* offset */,
> > > +                                           -1 /* pid */);
> > >         if (pfd < 0) {
> > >                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
> > >                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
> >
> > we can't use bpf_program__attach_perf_event as is now, because we need
> > to allocate a different struct bpf_link_kprobe.
>
> We could do the container encapsulation using heap in
> bpf_program_attach_kprobe_opts() or attach_kprobe() like I'm showing here, no ?
>
> ...
>         kplink = calloc(1, sizeof(struct bpf_link_kprobe));
>         if (!kplink)
>                 return libbpf_err_ptr(-ENOMEM);
>         kplink->link = bpf_program__attach_perf_event(prog, pfd);
> ...
>
> and then free all this structure (bpf_link and its encapsulation at the
> detachment, like said previously also). This way we don't have to change
> bpf_program__attach_perf_event() which would continue to serve
> bpf_program__attach_tracepoint() and bpf_program__attach_uprobe() unmodified.
> This way, kprobe would have a container for all cases and uprobe and tracepoint
> could have a container in the future if needed.
>
> > Let's extract the
> > PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
> > and use it from both bpf_program__attach_perf_event and
> > bpf_program__attach_kprobe. It's actually good because we can check
> > silly errors (like prog_fd < 0) before we create perf_event FD now.
>
> Okay, but I'm considering this orthogonal to what you said previously (on
> changing bpf_program__attach_perf_event). UNLESS you really prefer me to do the
> container allocation in bpf_program__attach_perf_event() but then we would have
> to free the container in all detachments (kprobe, tracepoint and uprobe) as it
> couldn't be placed in stack (or it would eventually be lost, no ?).

container will be different for kprobe/uprobe and
tracepoint/perf_event. So allocation has to happen separately from
PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support
  2021-07-20  0:10                       ` Andrii Nakryiko
@ 2021-07-20  4:16                         ` Rafael David Tinoco
  0 siblings, 0 replies; 24+ messages in thread
From: Rafael David Tinoco @ 2021-07-20  4:16 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: bpf

> >         plink = (struct bpf_link *const *) link;
> >         kplink = container_of(plink, struct bpf_link_kprobe, link);
> 
> Did you check if this works? Also how that could ever even work for
> non-kprobe but perf_event-based cases, like tracepoint? And why are we
> even discussing these "alternatives"? What's wrong with the way I
> proposed earlier? For container_of() to work you have to do it the way
> I described in my previous email with one struct being embedded in
> another one.

Nevermind. Sorry for the noise. I'll do the container encapsulation like
you said so. I was being lazy and trying to find a way to change just the
kprobes path instead of thinking in the overall project.

Sorry again, will come up with what you said in the first place. 

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2021-07-20  4:19 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-18  6:25 [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Rafael David Tinoco
2021-03-18 19:31 ` [PATCH] libbpf: allow bpf object kern_version to be overridden Rafael David Tinoco
2021-03-18 19:46   ` Andrii Nakryiko
2021-03-18 20:56     ` Daniel Borkmann
2021-03-19  4:38       ` Rafael David Tinoco
2021-03-19  4:51 ` [RFC][PATCH] libbpf: support kprobe/kretprobe events in legacy environments Andrii Nakryiko
2021-03-22 18:04   ` [PATCH v2 bpf-next][RFC] libbpf: introduce legacy kprobe events support Rafael David Tinoco
2021-03-22 18:25     ` Rafael David Tinoco
2021-03-26 20:50       ` Andrii Nakryiko
2021-04-07  4:49         ` Rafael David Tinoco
2021-04-07 22:33           ` Andrii Nakryiko
2021-04-08 23:59             ` John Fastabend
2021-04-14 14:30             ` Rafael David Tinoco
2021-04-14 20:06               ` Rafael David Tinoco
2021-04-14 23:23               ` Andrii Nakryiko
2021-04-15  5:53                 ` Rafael David Tinoco
2021-04-15 22:48                   ` Andrii Nakryiko
2021-06-25  4:44                 ` [PATCH bpf-next v3] " Rafael David Tinoco
2021-06-25  5:01                   ` Rafael David Tinoco
2021-07-07 13:38                   ` Rafael David Tinoco
2021-07-07 21:52                   ` Andrii Nakryiko
2021-07-19  1:59                     ` Rafael David Tinoco
2021-07-20  0:10                       ` Andrii Nakryiko
2021-07-20  4:16                         ` Rafael David Tinoco

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).