All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
@ 2024-01-30 23:05 Bryce Kahle
  2024-01-31 18:13 ` Alan Maguire
  2024-02-01  0:54 ` Quentin Monnet
  0 siblings, 2 replies; 21+ messages in thread
From: Bryce Kahle @ 2024-01-30 23:05 UTC (permalink / raw)
  To: bpf; +Cc: quentin, ast, daniel, Bryce Kahle

From: Bryce Kahle <bryce.kahle@datadoghq.com>

Enables a user to generate minimized kernel module BTF.

If an eBPF program probes a function within a kernel module or uses
types that come from a kernel module, split BTF is required. The split
module BTF contains only the BTF types that are unique to the module.
It will reference the base/vmlinux BTF types and always starts its type
IDs at X+1 where X is the largest type ID in the base BTF.

Minimization allows a user to ship only the types necessary to do
relocations for the program(s) in the provided eBPF object file(s). A
minimized module BTF will still not contain vmlinux BTF types, so you
should always minimize the vmlinux file first, and then minimize the
kernel module file.

Example:

bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o

v3->v4:
- address style nit about start_id initialization
- rename base to src_base_btf (base_btf is a global var)
- copy src_base_btf so new BTF is not modifying original vmlinux BTF

Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>
---
 .../bpf/bpftool/Documentation/bpftool-gen.rst | 18 ++++++++++-
 tools/bpf/bpftool/gen.c                       | 32 +++++++++++++++----
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 5006e724d..e067d3b05 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -16,7 +16,7 @@ SYNOPSIS
 
 	**bpftool** [*OPTIONS*] **gen** *COMMAND*
 
-	*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
+	*OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } | { **-L** | **--use-loader** } }
 
 	*COMMAND* := { **object** | **skeleton** | **help** }
 
@@ -202,6 +202,14 @@ OPTIONS
 =======
 	.. include:: common_options.rst
 
+	-B, --base-btf *FILE*
+		  Pass a base BTF object. Base BTF objects are typically used
+		  with BTF objects for kernel modules. To avoid duplicating
+		  all kernel symbols required by modules, BTF objects for
+		  modules are "split", they are built incrementally on top of
+		  the kernel (vmlinux) BTF object. So the base BTF reference
+		  should usually point to the kernel BTF.
+
 	-L, --use-loader
 		  For skeletons, generate a "light" skeleton (also known as "loader"
 		  skeleton). A light skeleton contains a loader eBPF program. It does
@@ -444,3 +452,11 @@ ones given to min_core_btf.
   obj = bpf_object__open_file("one.bpf.o", &opts);
 
   ...
+
+Kernel module BTF may also be minimized by using the -B option:
+
+**$ bpftool -B 5.4.0-smaller.btf gen min_core_btf 5.4.0-module.btf 5.4.0-module-smaller.btf one.bpf.o**
+
+A minimized module BTF will still not contain vmlinux BTF types, so you
+should always minimize the vmlinux file first, and then minimize the
+kernel module file.
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index ee3ce2b80..57691f766 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1630,6 +1630,7 @@ static int do_help(int argc, char **argv)
 		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_OPTIONS " |\n"
+		"                    {-B|--base-btf} |\n"
 		"                    {-L|--use-loader} }\n"
 		"",
 		bin_name, "gen");
@@ -1695,14 +1696,14 @@ btfgen_new_info(const char *targ_btf_path)
 	if (!info)
 		return NULL;
 
-	info->src_btf = btf__parse(targ_btf_path, NULL);
+	info->src_btf = btf__parse_split(targ_btf_path, base_btf);
 	if (!info->src_btf) {
 		err = -errno;
 		p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno));
 		goto err_out;
 	}
 
-	info->marked_btf = btf__parse(targ_btf_path, NULL);
+	info->marked_btf = btf__parse_split(targ_btf_path, base_btf);
 	if (!info->marked_btf) {
 		err = -errno;
 		p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno));
@@ -2139,12 +2140,29 @@ static int btfgen_remap_id(__u32 *type_id, void *ctx)
 /* Generate BTF from relocation information previously recorded */
 static struct btf *btfgen_get_btf(struct btfgen_info *info)
 {
-	struct btf *btf_new = NULL;
+	struct btf *btf_new = NULL, *src_base_btf_new = NULL;
 	unsigned int *ids = NULL;
+	const struct btf *src_base_btf;
 	unsigned int i, n = btf__type_cnt(info->marked_btf);
-	int err = 0;
+	int start_id, err = 0;
+
+	src_base_btf = btf__base_btf(info->src_btf);
+	start_id = src_base_btf ? btf__type_cnt(src_base_btf) : 1;
 
-	btf_new = btf__new_empty();
+	/* clone BTF to sanitize a copy and leave the original intact */
+	if (src_base_btf) {
+		const void *raw_data;
+		__u32 sz;
+
+		raw_data = btf__raw_data(src_base_btf, &sz);
+		src_base_btf_new = btf__new(raw_data, sz);
+		if (!src_base_btf_new) {
+			err = -errno;
+			goto err_out;
+		}
+	}
+
+	btf_new = btf__new_empty_split(src_base_btf_new);
 	if (!btf_new) {
 		err = -errno;
 		goto err_out;
@@ -2157,7 +2175,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info)
 	}
 
 	/* first pass: add all marked types to btf_new and add their new ids to the ids map */
-	for (i = 1; i < n; i++) {
+	for (i = start_id; i < n; i++) {
 		const struct btf_type *cloned_type, *type;
 		const char *name;
 		int new_id;
@@ -2213,7 +2231,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info)
 	}
 
 	/* second pass: fix up type ids */
-	for (i = 1; i < btf__type_cnt(btf_new); i++) {
+	for (i = start_id; i < btf__type_cnt(btf_new); i++) {
 		struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i);
 
 		err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-01-30 23:05 [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf Bryce Kahle
@ 2024-01-31 18:13 ` Alan Maguire
  2024-02-02 22:16   ` Andrii Nakryiko
  2024-02-01  0:54 ` Quentin Monnet
  1 sibling, 1 reply; 21+ messages in thread
From: Alan Maguire @ 2024-01-31 18:13 UTC (permalink / raw)
  To: Bryce Kahle, bpf; +Cc: quentin, ast, daniel, Bryce Kahle

On 30/01/2024 23:05, Bryce Kahle wrote:
> From: Bryce Kahle <bryce.kahle@datadoghq.com>
> 
> Enables a user to generate minimized kernel module BTF.
> 
> If an eBPF program probes a function within a kernel module or uses
> types that come from a kernel module, split BTF is required. The split
> module BTF contains only the BTF types that are unique to the module.
> It will reference the base/vmlinux BTF types and always starts its type
> IDs at X+1 where X is the largest type ID in the base BTF.
> 
> Minimization allows a user to ship only the types necessary to do
> relocations for the program(s) in the provided eBPF object file(s). A
> minimized module BTF will still not contain vmlinux BTF types, so you
> should always minimize the vmlinux file first, and then minimize the
> kernel module file.
> 
> Example:
> 
> bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o

This is great! I've been working on a somewhat related problem involving
split BTF for modules, and I'm trying to figure out if there's overlap
with what you've done here that can help in either direction. I'll try
and describe what I'm doing. Sorry if this is a bit of a diversion,
but I just want to check if there are potential ways your changes could
facilitate other scenarios in the future.

The problem I'm trying to tackle is to enable split BTF module
generation to be more resilient to underlying kernel BTF changes;
this would allow for example a module that is not built with the kernel
to generate BTF and have it work even if small changes in vmlinux occur.
Even a small change in BTF ids in base BTF is enough to invalidate the
associated split BTF, so the question is how to make this a bit less
brittle. This won't be needed for modules built along with the kernel,
but more for cases like a package delivering a kernel module.

The way this is done is similar to what you're doing - generating
minimal base vmlinux BTF along with the module BTF. In my case however
the minimization is not driven by CO-RE relocations; rather it is driven
by only adding types that are referenced by module BTF and any other
associated types needed. We end up with minimal base BTF that is carried
along with the module BTF (in a .BTF.base_minimal section) and this
minimal BTF will be used to later reconcile module BTF with the running
kernel BTF when the module is loaded; it essentially provides the
additional information needed to map to current vmlinux types.

In this approach, minimal vmlinux BTF is generated via an additional
option to pahole which adds an extra phase to BTF deduplication between
module and kernel. Once we have found the candidate mappings for
deduplication, we can look at all base BTF references from module BTF
and recursively add associated types to the base minimal BTF. Finally we
reparent the split BTF to this minimal base BTF. Experiments show most
modules wind up with base minimal BTF of around 4000 types, so the
minimization seems to work well. But it's complex.

So what I've been trying to work out is if this dedup complexity can be
eliminated with your changes, but from what I can see, the membership in
the minimal base BTF in your case is driven by the CO-RE relocations
used in the BPF program. Would there do you think be a future where we
would look at doing base minimal BTF generation by other criteria (like
references from the module BTF)? Thanks!

Alan

> v3->v4:
> - address style nit about start_id initialization
> - rename base to src_base_btf (base_btf is a global var)
> - copy src_base_btf so new BTF is not modifying original vmlinux BTF
> 
> Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>
> ---
>  .../bpf/bpftool/Documentation/bpftool-gen.rst | 18 ++++++++++-
>  tools/bpf/bpftool/gen.c                       | 32 +++++++++++++++----
>  2 files changed, 42 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
> index 5006e724d..e067d3b05 100644
> --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
> +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
> @@ -16,7 +16,7 @@ SYNOPSIS
>  
>  	**bpftool** [*OPTIONS*] **gen** *COMMAND*
>  
> -	*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
> +	*OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } | { **-L** | **--use-loader** } }
>  
>  	*COMMAND* := { **object** | **skeleton** | **help** }
>  
> @@ -202,6 +202,14 @@ OPTIONS
>  =======
>  	.. include:: common_options.rst
>  
> +	-B, --base-btf *FILE*
> +		  Pass a base BTF object. Base BTF objects are typically used
> +		  with BTF objects for kernel modules. To avoid duplicating
> +		  all kernel symbols required by modules, BTF objects for
> +		  modules are "split", they are built incrementally on top of
> +		  the kernel (vmlinux) BTF object. So the base BTF reference
> +		  should usually point to the kernel BTF.
> +
>  	-L, --use-loader
>  		  For skeletons, generate a "light" skeleton (also known as "loader"
>  		  skeleton). A light skeleton contains a loader eBPF program. It does
> @@ -444,3 +452,11 @@ ones given to min_core_btf.
>    obj = bpf_object__open_file("one.bpf.o", &opts);
>  
>    ...
> +
> +Kernel module BTF may also be minimized by using the -B option:
> +
> +**$ bpftool -B 5.4.0-smaller.btf gen min_core_btf 5.4.0-module.btf 5.4.0-module-smaller.btf one.bpf.o**
> +
> +A minimized module BTF will still not contain vmlinux BTF types, so you
> +should always minimize the vmlinux file first, and then minimize the
> +kernel module file.
> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> index ee3ce2b80..57691f766 100644
> --- a/tools/bpf/bpftool/gen.c
> +++ b/tools/bpf/bpftool/gen.c
> @@ -1630,6 +1630,7 @@ static int do_help(int argc, char **argv)
>  		"       %1$s %2$s help\n"
>  		"\n"
>  		"       " HELP_SPEC_OPTIONS " |\n"
> +		"                    {-B|--base-btf} |\n"
>  		"                    {-L|--use-loader} }\n"
>  		"",
>  		bin_name, "gen");
> @@ -1695,14 +1696,14 @@ btfgen_new_info(const char *targ_btf_path)
>  	if (!info)
>  		return NULL;
>  
> -	info->src_btf = btf__parse(targ_btf_path, NULL);
> +	info->src_btf = btf__parse_split(targ_btf_path, base_btf);
>  	if (!info->src_btf) {
>  		err = -errno;
>  		p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno));
>  		goto err_out;
>  	}
>  
> -	info->marked_btf = btf__parse(targ_btf_path, NULL);
> +	info->marked_btf = btf__parse_split(targ_btf_path, base_btf);
>  	if (!info->marked_btf) {
>  		err = -errno;
>  		p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno));
> @@ -2139,12 +2140,29 @@ static int btfgen_remap_id(__u32 *type_id, void *ctx)
>  /* Generate BTF from relocation information previously recorded */
>  static struct btf *btfgen_get_btf(struct btfgen_info *info)
>  {
> -	struct btf *btf_new = NULL;
> +	struct btf *btf_new = NULL, *src_base_btf_new = NULL;
>  	unsigned int *ids = NULL;
> +	const struct btf *src_base_btf;
>  	unsigned int i, n = btf__type_cnt(info->marked_btf);
> -	int err = 0;
> +	int start_id, err = 0;
> +
> +	src_base_btf = btf__base_btf(info->src_btf);
> +	start_id = src_base_btf ? btf__type_cnt(src_base_btf) : 1;
>  
> -	btf_new = btf__new_empty();
> +	/* clone BTF to sanitize a copy and leave the original intact */
> +	if (src_base_btf) {
> +		const void *raw_data;
> +		__u32 sz;
> +
> +		raw_data = btf__raw_data(src_base_btf, &sz);
> +		src_base_btf_new = btf__new(raw_data, sz);
> +		if (!src_base_btf_new) {
> +			err = -errno;
> +			goto err_out;
> +		}
> +	}
> +
> +	btf_new = btf__new_empty_split(src_base_btf_new);
>  	if (!btf_new) {
>  		err = -errno;
>  		goto err_out;
> @@ -2157,7 +2175,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info)
>  	}
>  
>  	/* first pass: add all marked types to btf_new and add their new ids to the ids map */
> -	for (i = 1; i < n; i++) {
> +	for (i = start_id; i < n; i++) {
>  		const struct btf_type *cloned_type, *type;
>  		const char *name;
>  		int new_id;
> @@ -2213,7 +2231,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info)
>  	}
>  
>  	/* second pass: fix up type ids */
> -	for (i = 1; i < btf__type_cnt(btf_new); i++) {
> +	for (i = start_id; i < btf__type_cnt(btf_new); i++) {
>  		struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i);
>  
>  		err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-01-30 23:05 [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf Bryce Kahle
  2024-01-31 18:13 ` Alan Maguire
@ 2024-02-01  0:54 ` Quentin Monnet
  2024-02-01 21:05   ` Bryce Kahle
  1 sibling, 1 reply; 21+ messages in thread
From: Quentin Monnet @ 2024-02-01  0:54 UTC (permalink / raw)
  To: Bryce Kahle, bpf; +Cc: ast, daniel, Bryce Kahle

2024-01-30 23:05 UTC+0000 ~ Bryce Kahle <git@brycekahle.com>
> From: Bryce Kahle <bryce.kahle@datadoghq.com>
> 
> Enables a user to generate minimized kernel module BTF.
> 
> If an eBPF program probes a function within a kernel module or uses
> types that come from a kernel module, split BTF is required. The split
> module BTF contains only the BTF types that are unique to the module.
> It will reference the base/vmlinux BTF types and always starts its type
> IDs at X+1 where X is the largest type ID in the base BTF.
> 
> Minimization allows a user to ship only the types necessary to do
> relocations for the program(s) in the provided eBPF object file(s). A
> minimized module BTF will still not contain vmlinux BTF types, so you
> should always minimize the vmlinux file first, and then minimize the
> kernel module file.
> 
> Example:
> 
> bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
> 
> v3->v4:
> - address style nit about start_id initialization
> - rename base to src_base_btf (base_btf is a global var)
> - copy src_base_btf so new BTF is not modifying original vmlinux BTF
> 
> Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>

Looks good, thank you!

Reviewed-by: Quentin Monnet <quentin@isovalent.com>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-01  0:54 ` Quentin Monnet
@ 2024-02-01 21:05   ` Bryce Kahle
  2024-02-02 22:10     ` Andrii Nakryiko
  0 siblings, 1 reply; 21+ messages in thread
From: Bryce Kahle @ 2024-02-01 21:05 UTC (permalink / raw)
  To: Quentin Monnet; +Cc: Bryce Kahle, bpf, ast, daniel

I have discovered an issue with this approach. If you minimize the
vmlinux file first, then it can remove/renumber types that are
referenced from the module BTF. This causes bpftool to fail to parse
the module BTF file. Likewise, if you minimize the vmlinux second,
then the minimized module BTF will reference invalid vmlinux type IDs
because they have been renumbered/removed.

We essentially need to minimize all the modules and the vmlinux at the
same time.


On Wed, Jan 31, 2024 at 4:54 PM Quentin Monnet <quentin@isovalent.com> wrote:
>
> 2024-01-30 23:05 UTC+0000 ~ Bryce Kahle <git@brycekahle.com>
> > From: Bryce Kahle <bryce.kahle@datadoghq.com>
> >
> > Enables a user to generate minimized kernel module BTF.
> >
> > If an eBPF program probes a function within a kernel module or uses
> > types that come from a kernel module, split BTF is required. The split
> > module BTF contains only the BTF types that are unique to the module.
> > It will reference the base/vmlinux BTF types and always starts its type
> > IDs at X+1 where X is the largest type ID in the base BTF.
> >
> > Minimization allows a user to ship only the types necessary to do
> > relocations for the program(s) in the provided eBPF object file(s). A
> > minimized module BTF will still not contain vmlinux BTF types, so you
> > should always minimize the vmlinux file first, and then minimize the
> > kernel module file.
> >
> > Example:
> >
> > bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> > bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
> >
> > v3->v4:
> > - address style nit about start_id initialization
> > - rename base to src_base_btf (base_btf is a global var)
> > - copy src_base_btf so new BTF is not modifying original vmlinux BTF
> >
> > Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>
>
> Looks good, thank you!
>
> Reviewed-by: Quentin Monnet <quentin@isovalent.com>
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-01 21:05   ` Bryce Kahle
@ 2024-02-02 22:10     ` Andrii Nakryiko
  2024-02-03  0:58       ` Bryce Kahle
  0 siblings, 1 reply; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-02 22:10 UTC (permalink / raw)
  To: Bryce Kahle; +Cc: Quentin Monnet, Bryce Kahle, bpf, ast, daniel

On Thu, Feb 1, 2024 at 1:05 PM Bryce Kahle <bryce.kahle@datadoghq.com> wrote:
>
> I have discovered an issue with this approach. If you minimize the
> vmlinux file first, then it can remove/renumber types that are
> referenced from the module BTF. This causes bpftool to fail to parse
> the module BTF file. Likewise, if you minimize the vmlinux second,
> then the minimized module BTF will reference invalid vmlinux type IDs
> because they have been renumbered/removed.
>
> We essentially need to minimize all the modules and the vmlinux at the
> same time.

Maybe the right solution is to concat vmlinux and all the relevant
module BTFs first, dedup it again, then minimize against that "super
BTF". But yes, you'd have to specify both vmlinux and all the module
BTFs at the same time (which bpftool allows you to do easily with its
CLI interface, so not really a problem)

>
>
> On Wed, Jan 31, 2024 at 4:54 PM Quentin Monnet <quentin@isovalent.com> wrote:
> >
> > 2024-01-30 23:05 UTC+0000 ~ Bryce Kahle <git@brycekahle.com>
> > > From: Bryce Kahle <bryce.kahle@datadoghq.com>
> > >
> > > Enables a user to generate minimized kernel module BTF.
> > >
> > > If an eBPF program probes a function within a kernel module or uses
> > > types that come from a kernel module, split BTF is required. The split
> > > module BTF contains only the BTF types that are unique to the module.
> > > It will reference the base/vmlinux BTF types and always starts its type
> > > IDs at X+1 where X is the largest type ID in the base BTF.
> > >
> > > Minimization allows a user to ship only the types necessary to do
> > > relocations for the program(s) in the provided eBPF object file(s). A
> > > minimized module BTF will still not contain vmlinux BTF types, so you
> > > should always minimize the vmlinux file first, and then minimize the
> > > kernel module file.
> > >
> > > Example:
> > >
> > > bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> > > bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
> > >
> > > v3->v4:
> > > - address style nit about start_id initialization
> > > - rename base to src_base_btf (base_btf is a global var)
> > > - copy src_base_btf so new BTF is not modifying original vmlinux BTF
> > >
> > > Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>
> >
> > Looks good, thank you!
> >
> > Reviewed-by: Quentin Monnet <quentin@isovalent.com>
> >
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-01-31 18:13 ` Alan Maguire
@ 2024-02-02 22:16   ` Andrii Nakryiko
  2024-02-06 10:59     ` Alan Maguire
  0 siblings, 1 reply; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-02 22:16 UTC (permalink / raw)
  To: Alan Maguire; +Cc: Bryce Kahle, bpf, quentin, ast, daniel, Bryce Kahle

On Wed, Jan 31, 2024 at 10:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 30/01/2024 23:05, Bryce Kahle wrote:
> > From: Bryce Kahle <bryce.kahle@datadoghq.com>
> >
> > Enables a user to generate minimized kernel module BTF.
> >
> > If an eBPF program probes a function within a kernel module or uses
> > types that come from a kernel module, split BTF is required. The split
> > module BTF contains only the BTF types that are unique to the module.
> > It will reference the base/vmlinux BTF types and always starts its type
> > IDs at X+1 where X is the largest type ID in the base BTF.
> >
> > Minimization allows a user to ship only the types necessary to do
> > relocations for the program(s) in the provided eBPF object file(s). A
> > minimized module BTF will still not contain vmlinux BTF types, so you
> > should always minimize the vmlinux file first, and then minimize the
> > kernel module file.
> >
> > Example:
> >
> > bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> > bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
>
> This is great! I've been working on a somewhat related problem involving
> split BTF for modules, and I'm trying to figure out if there's overlap
> with what you've done here that can help in either direction. I'll try
> and describe what I'm doing. Sorry if this is a bit of a diversion,
> but I just want to check if there are potential ways your changes could
> facilitate other scenarios in the future.
>
> The problem I'm trying to tackle is to enable split BTF module
> generation to be more resilient to underlying kernel BTF changes;
> this would allow for example a module that is not built with the kernel
> to generate BTF and have it work even if small changes in vmlinux occur.
> Even a small change in BTF ids in base BTF is enough to invalidate the
> associated split BTF, so the question is how to make this a bit less
> brittle. This won't be needed for modules built along with the kernel,
> but more for cases like a package delivering a kernel module.
>
> The way this is done is similar to what you're doing - generating
> minimal base vmlinux BTF along with the module BTF. In my case however
> the minimization is not driven by CO-RE relocations; rather it is driven
> by only adding types that are referenced by module BTF and any other
> associated types needed. We end up with minimal base BTF that is carried
> along with the module BTF (in a .BTF.base_minimal section) and this
> minimal BTF will be used to later reconcile module BTF with the running
> kernel BTF when the module is loaded; it essentially provides the
> additional information needed to map to current vmlinux types.
>
> In this approach, minimal vmlinux BTF is generated via an additional
> option to pahole which adds an extra phase to BTF deduplication between
> module and kernel. Once we have found the candidate mappings for
> deduplication, we can look at all base BTF references from module BTF
> and recursively add associated types to the base minimal BTF. Finally we
> reparent the split BTF to this minimal base BTF. Experiments show most
> modules wind up with base minimal BTF of around 4000 types, so the
> minimization seems to work well. But it's complex.
>
> So what I've been trying to work out is if this dedup complexity can be
> eliminated with your changes, but from what I can see, the membership in
> the minimal base BTF in your case is driven by the CO-RE relocations
> used in the BPF program. Would there do you think be a future where we
> would look at doing base minimal BTF generation by other criteria (like
> references from the module BTF)? Thanks!

Hm... I might be misremembering or missing something, but the problem
you are solving doesn't seem to be related to BTF minimization. I also
forgot why you need BTF deduplication, I vaguely remember we needed to
remember "expectations" of types that module BTF references in vmlinux
BTF, but I fail to remember why we needed dedup... Perhaps we need a
BPF office hours session to go over details again?

>
> Alan
>
> > v3->v4:
> > - address style nit about start_id initialization
> > - rename base to src_base_btf (base_btf is a global var)
> > - copy src_base_btf so new BTF is not modifying original vmlinux BTF
> >
> > Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>
> > ---
> >  .../bpf/bpftool/Documentation/bpftool-gen.rst | 18 ++++++++++-
> >  tools/bpf/bpftool/gen.c                       | 32 +++++++++++++++----
> >  2 files changed, 42 insertions(+), 8 deletions(-)
> >
> > diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
> > index 5006e724d..e067d3b05 100644
> > --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
> > +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
> > @@ -16,7 +16,7 @@ SYNOPSIS
> >
> >       **bpftool** [*OPTIONS*] **gen** *COMMAND*
> >
> > -     *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
> > +     *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } | { **-L** | **--use-loader** } }
> >
> >       *COMMAND* := { **object** | **skeleton** | **help** }
> >
> > @@ -202,6 +202,14 @@ OPTIONS
> >  =======
> >       .. include:: common_options.rst
> >
> > +     -B, --base-btf *FILE*
> > +               Pass a base BTF object. Base BTF objects are typically used
> > +               with BTF objects for kernel modules. To avoid duplicating
> > +               all kernel symbols required by modules, BTF objects for
> > +               modules are "split", they are built incrementally on top of
> > +               the kernel (vmlinux) BTF object. So the base BTF reference
> > +               should usually point to the kernel BTF.
> > +
> >       -L, --use-loader
> >                 For skeletons, generate a "light" skeleton (also known as "loader"
> >                 skeleton). A light skeleton contains a loader eBPF program. It does
> > @@ -444,3 +452,11 @@ ones given to min_core_btf.
> >    obj = bpf_object__open_file("one.bpf.o", &opts);
> >
> >    ...
> > +
> > +Kernel module BTF may also be minimized by using the -B option:
> > +
> > +**$ bpftool -B 5.4.0-smaller.btf gen min_core_btf 5.4.0-module.btf 5.4.0-module-smaller.btf one.bpf.o**
> > +
> > +A minimized module BTF will still not contain vmlinux BTF types, so you
> > +should always minimize the vmlinux file first, and then minimize the
> > +kernel module file.
> > diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> > index ee3ce2b80..57691f766 100644
> > --- a/tools/bpf/bpftool/gen.c
> > +++ b/tools/bpf/bpftool/gen.c
> > @@ -1630,6 +1630,7 @@ static int do_help(int argc, char **argv)
> >               "       %1$s %2$s help\n"
> >               "\n"
> >               "       " HELP_SPEC_OPTIONS " |\n"
> > +             "                    {-B|--base-btf} |\n"
> >               "                    {-L|--use-loader} }\n"
> >               "",
> >               bin_name, "gen");
> > @@ -1695,14 +1696,14 @@ btfgen_new_info(const char *targ_btf_path)
> >       if (!info)
> >               return NULL;
> >
> > -     info->src_btf = btf__parse(targ_btf_path, NULL);
> > +     info->src_btf = btf__parse_split(targ_btf_path, base_btf);
> >       if (!info->src_btf) {
> >               err = -errno;
> >               p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno));
> >               goto err_out;
> >       }
> >
> > -     info->marked_btf = btf__parse(targ_btf_path, NULL);
> > +     info->marked_btf = btf__parse_split(targ_btf_path, base_btf);
> >       if (!info->marked_btf) {
> >               err = -errno;
> >               p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno));
> > @@ -2139,12 +2140,29 @@ static int btfgen_remap_id(__u32 *type_id, void *ctx)
> >  /* Generate BTF from relocation information previously recorded */
> >  static struct btf *btfgen_get_btf(struct btfgen_info *info)
> >  {
> > -     struct btf *btf_new = NULL;
> > +     struct btf *btf_new = NULL, *src_base_btf_new = NULL;
> >       unsigned int *ids = NULL;
> > +     const struct btf *src_base_btf;
> >       unsigned int i, n = btf__type_cnt(info->marked_btf);
> > -     int err = 0;
> > +     int start_id, err = 0;
> > +
> > +     src_base_btf = btf__base_btf(info->src_btf);
> > +     start_id = src_base_btf ? btf__type_cnt(src_base_btf) : 1;
> >
> > -     btf_new = btf__new_empty();
> > +     /* clone BTF to sanitize a copy and leave the original intact */
> > +     if (src_base_btf) {
> > +             const void *raw_data;
> > +             __u32 sz;
> > +
> > +             raw_data = btf__raw_data(src_base_btf, &sz);
> > +             src_base_btf_new = btf__new(raw_data, sz);
> > +             if (!src_base_btf_new) {
> > +                     err = -errno;
> > +                     goto err_out;
> > +             }
> > +     }
> > +
> > +     btf_new = btf__new_empty_split(src_base_btf_new);
> >       if (!btf_new) {
> >               err = -errno;
> >               goto err_out;
> > @@ -2157,7 +2175,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info)
> >       }
> >
> >       /* first pass: add all marked types to btf_new and add their new ids to the ids map */
> > -     for (i = 1; i < n; i++) {
> > +     for (i = start_id; i < n; i++) {
> >               const struct btf_type *cloned_type, *type;
> >               const char *name;
> >               int new_id;
> > @@ -2213,7 +2231,7 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info)
> >       }
> >
> >       /* second pass: fix up type ids */
> > -     for (i = 1; i < btf__type_cnt(btf_new); i++) {
> > +     for (i = start_id; i < btf__type_cnt(btf_new); i++) {
> >               struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i);
> >
> >               err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids);
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-02 22:10     ` Andrii Nakryiko
@ 2024-02-03  0:58       ` Bryce Kahle
  2024-02-05 18:21         ` Andrii Nakryiko
  0 siblings, 1 reply; 21+ messages in thread
From: Bryce Kahle @ 2024-02-03  0:58 UTC (permalink / raw)
  To: Andrii Nakryiko, Bryce Kahle; +Cc: Quentin Monnet, bpf, ast, daniel

On Fri, Feb 2, 2024, at 2:10 PM, Andrii Nakryiko wrote:
> 
> Maybe the right solution is to concat vmlinux and all the relevant
> module BTFs first, dedup it again, then minimize against that "super
> BTF". But yes, you'd have to specify both vmlinux and all the module
> BTFs at the same time (which bpftool allows you to do easily with its
> CLI interface, so not really a problem)
> 

How would you handle the Type ID conflicts between the modules, since they all start at vmlinux+1? Is there a danger of conflicting type names, where there are two types with the same name but different layouts?

I was trying to mirror the sysfs file layout, so a loader didn't have different behavior between user-supplied BTF and kernel-provided BTF.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-03  0:58       ` Bryce Kahle
@ 2024-02-05 18:21         ` Andrii Nakryiko
  2024-02-07 18:51           ` Bryce Kahle
  2024-02-26 21:48           ` Bryce Kahle
  0 siblings, 2 replies; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-05 18:21 UTC (permalink / raw)
  To: Bryce Kahle; +Cc: Bryce Kahle, Quentin Monnet, bpf, ast, daniel

On Fri, Feb 2, 2024 at 4:58 PM Bryce Kahle <git@brycekahle.com> wrote:
>
> On Fri, Feb 2, 2024, at 2:10 PM, Andrii Nakryiko wrote:
> >
> > Maybe the right solution is to concat vmlinux and all the relevant
> > module BTFs first, dedup it again, then minimize against that "super
> > BTF". But yes, you'd have to specify both vmlinux and all the module
> > BTFs at the same time (which bpftool allows you to do easily with its
> > CLI interface, so not really a problem)
> >
>
> How would you handle the Type ID conflicts between the modules, since they all start at vmlinux+1? Is there a danger of conflicting type names, where there are two types with the same name but different layouts?

The sequence would be:

1) create BTF instance from vmlinux BTF
2) append each module BTF to that instance (we have btf__add_btf() API
already for that). This will rewrite type IDs for each module
(shifting them by some constant number)
3) btf__dedup() will deduplicate everything, so that only unique type
definitions remain.

>
> I was trying to mirror the sysfs file layout, so a loader didn't have different behavior between user-supplied BTF and kernel-provided BTF.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-02 22:16   ` Andrii Nakryiko
@ 2024-02-06 10:59     ` Alan Maguire
  2024-02-08  0:26       ` Andrii Nakryiko
  0 siblings, 1 reply; 21+ messages in thread
From: Alan Maguire @ 2024-02-06 10:59 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Bryce Kahle, bpf, quentin, ast, daniel, Bryce Kahle

On 02/02/2024 22:16, Andrii Nakryiko wrote:
> On Wed, Jan 31, 2024 at 10:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>
>> On 30/01/2024 23:05, Bryce Kahle wrote:
>>> From: Bryce Kahle <bryce.kahle@datadoghq.com>
>>>
>>> Enables a user to generate minimized kernel module BTF.
>>>
>>> If an eBPF program probes a function within a kernel module or uses
>>> types that come from a kernel module, split BTF is required. The split
>>> module BTF contains only the BTF types that are unique to the module.
>>> It will reference the base/vmlinux BTF types and always starts its type
>>> IDs at X+1 where X is the largest type ID in the base BTF.
>>>
>>> Minimization allows a user to ship only the types necessary to do
>>> relocations for the program(s) in the provided eBPF object file(s). A
>>> minimized module BTF will still not contain vmlinux BTF types, so you
>>> should always minimize the vmlinux file first, and then minimize the
>>> kernel module file.
>>>
>>> Example:
>>>
>>> bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
>>> bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
>>
>> This is great! I've been working on a somewhat related problem involving
>> split BTF for modules, and I'm trying to figure out if there's overlap
>> with what you've done here that can help in either direction. I'll try
>> and describe what I'm doing. Sorry if this is a bit of a diversion,
>> but I just want to check if there are potential ways your changes could
>> facilitate other scenarios in the future.
>>
>> The problem I'm trying to tackle is to enable split BTF module
>> generation to be more resilient to underlying kernel BTF changes;
>> this would allow for example a module that is not built with the kernel
>> to generate BTF and have it work even if small changes in vmlinux occur.
>> Even a small change in BTF ids in base BTF is enough to invalidate the
>> associated split BTF, so the question is how to make this a bit less
>> brittle. This won't be needed for modules built along with the kernel,
>> but more for cases like a package delivering a kernel module.
>>
>> The way this is done is similar to what you're doing - generating
>> minimal base vmlinux BTF along with the module BTF. In my case however
>> the minimization is not driven by CO-RE relocations; rather it is driven
>> by only adding types that are referenced by module BTF and any other
>> associated types needed. We end up with minimal base BTF that is carried
>> along with the module BTF (in a .BTF.base_minimal section) and this
>> minimal BTF will be used to later reconcile module BTF with the running
>> kernel BTF when the module is loaded; it essentially provides the
>> additional information needed to map to current vmlinux types.
>>
>> In this approach, minimal vmlinux BTF is generated via an additional
>> option to pahole which adds an extra phase to BTF deduplication between
>> module and kernel. Once we have found the candidate mappings for
>> deduplication, we can look at all base BTF references from module BTF
>> and recursively add associated types to the base minimal BTF. Finally we
>> reparent the split BTF to this minimal base BTF. Experiments show most
>> modules wind up with base minimal BTF of around 4000 types, so the
>> minimization seems to work well. But it's complex.
>>
>> So what I've been trying to work out is if this dedup complexity can be
>> eliminated with your changes, but from what I can see, the membership in
>> the minimal base BTF in your case is driven by the CO-RE relocations
>> used in the BPF program. Would there do you think be a future where we
>> would look at doing base minimal BTF generation by other criteria (like
>> references from the module BTF)? Thanks!
> 
> Hm... I might be misremembering or missing something, but the problem
> you are solving doesn't seem to be related to BTF minimization. I also
> forgot why you need BTF deduplication, I vaguely remember we needed to
> remember "expectations" of types that module BTF references in vmlinux
> BTF, but I fail to remember why we needed dedup... Perhaps we need a
> BPF office hours session to go over details again?
>

Yeah, that would be great! I've put

Making split BTF more resilient

..on the agenda for 02-15.

The reason BTF minimization comes into the picture is this - the
expectations split BTF can have of base BTF can be quite complex, and in
figuring out ways to represent them, it occurred that BTF itself - in
the form of the minimal BTF needed to represent those split BTF
references - made sense. Consider cases like a split BTF struct that
contains a base BTF struct embedded in it. If we have a minimal base BTF
which contains such needed base types, we are in a position to use it to
later reconcile the base BTF worlds at encoding time and use time (for
example vmlinux BTF at module build time versus current vmlinux BTF).

Further, a natural time to construct that minimal base BTF presents
itself when we do deduplication between split and base BTF.  The phase
after we have mapped split types to canonical types is the ideal time to
handle this; the algorithm is basically

- foreach reference from split -> base BTF
 - add it to base minimal BTF
This is controlled by a new dedup option - gen_base_btf_minimal - which
would be enabled via  a ---btf_features option to pahole for users who
wanted to generate minimal base BTF. pahole places the new minimized
base BTF in .BTF.base_minimal section, with the split BTF referring to
it in the usual .BTF section. Later this base minimal BTF is used to
reconcile the split BTF expectations with current base BTF.

The kinds of minimizations I see are pretty reasonable for kernel
modules; I tried a number of in-tree modules (which wouldn't use this
feature in practice, just wanted to have something to test with), and
around 4000 types were observed in base minimal BTF.

It's possible we could adapt this minimization process to be guided
by CO-RE relocations (rather than split->base BTF references), if that
would help Bryce's case.

Alan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-05 18:21         ` Andrii Nakryiko
@ 2024-02-07 18:51           ` Bryce Kahle
  2024-02-07 22:38             ` Yonghong Song
  2024-02-26 21:48           ` Bryce Kahle
  1 sibling, 1 reply; 21+ messages in thread
From: Bryce Kahle @ 2024-02-07 18:51 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Bryce Kahle, Quentin Monnet, bpf, ast, daniel

On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
> 3) btf__dedup() will deduplicate everything, so that only unique type
> definitions remain.
>

Since minimization only keeps used struct and union members, couldn't
you have two internal types from different modules which conflict and
end up using the wrong offset?

Example:
in module M:
struct S {
... // other unused members
int x; // offset 12 (for example)
}

in module N:
struct S {
... // other unused members
int x; // offset 20 (something different from S.x in module M)
}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-07 18:51           ` Bryce Kahle
@ 2024-02-07 22:38             ` Yonghong Song
  2024-02-08  0:30               ` Andrii Nakryiko
  0 siblings, 1 reply; 21+ messages in thread
From: Yonghong Song @ 2024-02-07 22:38 UTC (permalink / raw)
  To: Bryce Kahle, Andrii Nakryiko
  Cc: Bryce Kahle, Quentin Monnet, bpf, ast, daniel


On 2/7/24 10:51 AM, Bryce Kahle wrote:
> On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>> 3) btf__dedup() will deduplicate everything, so that only unique type
>> definitions remain.
>>
A random thought about another way.
At module side, we keep
   - module btf
   - another section (e.g. .BTF.extra) to keep minimum kernel-side
     types which directly used by module btf

   for example, module btf has
     struct foo {
       struct task_struct *t;
     }
     module btf encoding will have id, say 20,
     for 'struct task_struct' which is at that time
     the id in linux kernel.
   Then the module .BTF.extra contains
     id 20: struct task_struct type encoding
     there is no need to encode more types beyond pointers.
     this can be simpler or more complex depending
     on what to do during module load.

When a module load:
   For each .BTF.extra entry, trying to match
   the corresponding types in the current kernel.
   The type in the current type should have same
   size as the one in .BTF.extra if otherwise
   layout in the module btf may change.

   If new kernel type can be used for module BTF,
   simply replace the old id with new id in module BTF.

   Otherwise, type mismatch may happen and the corresponding
   module btf type should be invalidated.

> Since minimization only keeps used struct and union members, couldn't
> you have two internal types from different modules which conflict and
> end up using the wrong offset?
>
> Example:
> in module M:
> struct S {
> ... // other unused members
> int x; // offset 12 (for example)
> }
>
> in module N:
> struct S {
> ... // other unused members
> int x; // offset 20 (something different from S.x in module M)
> }
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-06 10:59     ` Alan Maguire
@ 2024-02-08  0:26       ` Andrii Nakryiko
  2024-02-08 22:45         ` Alan Maguire
  0 siblings, 1 reply; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-08  0:26 UTC (permalink / raw)
  To: Alan Maguire; +Cc: Bryce Kahle, bpf, quentin, ast, daniel, Bryce Kahle

On Tue, Feb 6, 2024 at 2:59 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 02/02/2024 22:16, Andrii Nakryiko wrote:
> > On Wed, Jan 31, 2024 at 10:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>
> >> On 30/01/2024 23:05, Bryce Kahle wrote:
> >>> From: Bryce Kahle <bryce.kahle@datadoghq.com>
> >>>
> >>> Enables a user to generate minimized kernel module BTF.
> >>>
> >>> If an eBPF program probes a function within a kernel module or uses
> >>> types that come from a kernel module, split BTF is required. The split
> >>> module BTF contains only the BTF types that are unique to the module.
> >>> It will reference the base/vmlinux BTF types and always starts its type
> >>> IDs at X+1 where X is the largest type ID in the base BTF.
> >>>
> >>> Minimization allows a user to ship only the types necessary to do
> >>> relocations for the program(s) in the provided eBPF object file(s). A
> >>> minimized module BTF will still not contain vmlinux BTF types, so you
> >>> should always minimize the vmlinux file first, and then minimize the
> >>> kernel module file.
> >>>
> >>> Example:
> >>>
> >>> bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> >>> bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
> >>
> >> This is great! I've been working on a somewhat related problem involving
> >> split BTF for modules, and I'm trying to figure out if there's overlap
> >> with what you've done here that can help in either direction. I'll try
> >> and describe what I'm doing. Sorry if this is a bit of a diversion,
> >> but I just want to check if there are potential ways your changes could
> >> facilitate other scenarios in the future.
> >>
> >> The problem I'm trying to tackle is to enable split BTF module
> >> generation to be more resilient to underlying kernel BTF changes;
> >> this would allow for example a module that is not built with the kernel
> >> to generate BTF and have it work even if small changes in vmlinux occur.
> >> Even a small change in BTF ids in base BTF is enough to invalidate the
> >> associated split BTF, so the question is how to make this a bit less
> >> brittle. This won't be needed for modules built along with the kernel,
> >> but more for cases like a package delivering a kernel module.
> >>
> >> The way this is done is similar to what you're doing - generating
> >> minimal base vmlinux BTF along with the module BTF. In my case however
> >> the minimization is not driven by CO-RE relocations; rather it is driven
> >> by only adding types that are referenced by module BTF and any other
> >> associated types needed. We end up with minimal base BTF that is carried
> >> along with the module BTF (in a .BTF.base_minimal section) and this
> >> minimal BTF will be used to later reconcile module BTF with the running
> >> kernel BTF when the module is loaded; it essentially provides the
> >> additional information needed to map to current vmlinux types.
> >>
> >> In this approach, minimal vmlinux BTF is generated via an additional
> >> option to pahole which adds an extra phase to BTF deduplication between
> >> module and kernel. Once we have found the candidate mappings for
> >> deduplication, we can look at all base BTF references from module BTF
> >> and recursively add associated types to the base minimal BTF. Finally we
> >> reparent the split BTF to this minimal base BTF. Experiments show most
> >> modules wind up with base minimal BTF of around 4000 types, so the
> >> minimization seems to work well. But it's complex.
> >>
> >> So what I've been trying to work out is if this dedup complexity can be
> >> eliminated with your changes, but from what I can see, the membership in
> >> the minimal base BTF in your case is driven by the CO-RE relocations
> >> used in the BPF program. Would there do you think be a future where we
> >> would look at doing base minimal BTF generation by other criteria (like
> >> references from the module BTF)? Thanks!
> >
> > Hm... I might be misremembering or missing something, but the problem
> > you are solving doesn't seem to be related to BTF minimization. I also
> > forgot why you need BTF deduplication, I vaguely remember we needed to
> > remember "expectations" of types that module BTF references in vmlinux
> > BTF, but I fail to remember why we needed dedup... Perhaps we need a
> > BPF office hours session to go over details again?
> >
>
> Yeah, that would be great! I've put
>
> Making split BTF more resilient
>
> ..on the agenda for 02-15.
>
> The reason BTF minimization comes into the picture is this - the
> expectations split BTF can have of base BTF can be quite complex, and in
> figuring out ways to represent them, it occurred that BTF itself - in
> the form of the minimal BTF needed to represent those split BTF
> references - made sense. Consider cases like a split BTF struct that
> contains a base BTF struct embedded in it. If we have a minimal base BTF
> which contains such needed base types, we are in a position to use it to
> later reconcile the base BTF worlds at encoding time and use time (for
> example vmlinux BTF at module build time versus current vmlinux BTF).
>
> Further, a natural time to construct that minimal base BTF presents
> itself when we do deduplication between split and base BTF.  The phase
> after we have mapped split types to canonical types is the ideal time to
> handle this; the algorithm is basically
>
> - foreach reference from split -> base BTF
>  - add it to base minimal BTF
> This is controlled by a new dedup option - gen_base_btf_minimal - which
> would be enabled via  a ---btf_features option to pahole for users who
> wanted to generate minimal base BTF. pahole places the new minimized
> base BTF in .BTF.base_minimal section, with the split BTF referring to
> it in the usual .BTF section. Later this base minimal BTF is used to
> reconcile the split BTF expectations with current base BTF.
>
> The kinds of minimizations I see are pretty reasonable for kernel
> modules; I tried a number of in-tree modules (which wouldn't use this
> feature in practice, just wanted to have something to test with), and
> around 4000 types were observed in base minimal BTF.
>
> It's possible we could adapt this minimization process to be guided
> by CO-RE relocations (rather than split->base BTF references), if that
> would help Bryce's case.

I think this minimization idea is overcomplicating anything. First, we
don't have CO-RE relocations, and from BTF alone we don't know what
fields of base BTF structs module is referencing (that may or may not
be in DWARF). So I don't think there is anything to minimize.

On the other hand, it seems reasonable to record a few basic things
about base BTF type expectations:
  - name
  - size and whether that size has to be exact. This would be
determined if base BTF type is ever embedded or is only referenced by
pointer;
  - we can record number of fields, but you said you want to enable
extensions, so it will have to be treated as minimum number of fields,
probably?

Basically, all we want to ensure is that overall memory layout is
compatible and doesn't cause any module field to be shifted.

>
> Alan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-07 22:38             ` Yonghong Song
@ 2024-02-08  0:30               ` Andrii Nakryiko
  2024-02-08  1:56                 ` Yonghong Song
  0 siblings, 1 reply; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-08  0:30 UTC (permalink / raw)
  To: Yonghong Song; +Cc: Bryce Kahle, Bryce Kahle, Quentin Monnet, bpf, ast, daniel

On Wed, Feb 7, 2024 at 2:38 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
> On 2/7/24 10:51 AM, Bryce Kahle wrote:
> > On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> >> 3) btf__dedup() will deduplicate everything, so that only unique type
> >> definitions remain.
> >>
> A random thought about another way.
> At module side, we keep
>    - module btf
>    - another section (e.g. .BTF.extra) to keep minimum kernel-side
>      types which directly used by module btf
>
>    for example, module btf has
>      struct foo {
>        struct task_struct *t;
>      }
>      module btf encoding will have id, say 20,
>      for 'struct task_struct' which is at that time
>      the id in linux kernel.
>    Then the module .BTF.extra contains
>      id 20: struct task_struct type encoding
>      there is no need to encode more types beyond pointers.
>      this can be simpler or more complex depending
>      on what to do during module load.
>
> When a module load:
>    For each .BTF.extra entry, trying to match
>    the corresponding types in the current kernel.
>    The type in the current type should have same
>    size as the one in .BTF.extra if otherwise
>    layout in the module btf may change.
>
>    If new kernel type can be used for module BTF,
>    simply replace the old id with new id in module BTF.
>
>    Otherwise, type mismatch may happen and the corresponding
>    module btf type should be invalidated.

Yes, I agree, see my reply to Alan. I'm just unsure how strict we want
to be and whether we need to record fields of expected vmlinux BTF
types. Or if just recording expected size would be enough (to ensure
correct memory layout if base BTF type is embedded into module BTF
type).

Perhaps, if BTF type is referenced from some "trusted" BTF type (used
by kfunc, or in BTF ID set) we might want to enforce strict
compatibility, but for any other type just make sure that size is
correct (if it matters at all; i.e., if base BTF type is referenced by
pointer only, we don't even need to check size).

WDYT?

>
> > Since minimization only keeps used struct and union members, couldn't
> > you have two internal types from different modules which conflict and
> > end up using the wrong offset?
> >
> > Example:
> > in module M:
> > struct S {
> > ... // other unused members
> > int x; // offset 12 (for example)
> > }
> >
> > in module N:
> > struct S {
> > ... // other unused members
> > int x; // offset 20 (something different from S.x in module M)
> > }
> >

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-08  0:30               ` Andrii Nakryiko
@ 2024-02-08  1:56                 ` Yonghong Song
  2024-02-08 23:01                   ` Alan Maguire
  0 siblings, 1 reply; 21+ messages in thread
From: Yonghong Song @ 2024-02-08  1:56 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Bryce Kahle, Bryce Kahle, Quentin Monnet, bpf, ast, daniel


On 2/7/24 4:30 PM, Andrii Nakryiko wrote:
> On Wed, Feb 7, 2024 at 2:38 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>>
>> On 2/7/24 10:51 AM, Bryce Kahle wrote:
>>> On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
>>> <andrii.nakryiko@gmail.com> wrote:
>>>> 3) btf__dedup() will deduplicate everything, so that only unique type
>>>> definitions remain.
>>>>
>> A random thought about another way.
>> At module side, we keep
>>     - module btf
>>     - another section (e.g. .BTF.extra) to keep minimum kernel-side
>>       types which directly used by module btf
>>
>>     for example, module btf has
>>       struct foo {
>>         struct task_struct *t;
>>       }
>>       module btf encoding will have id, say 20,
>>       for 'struct task_struct' which is at that time
>>       the id in linux kernel.
>>     Then the module .BTF.extra contains
>>       id 20: struct task_struct type encoding
>>       there is no need to encode more types beyond pointers.
>>       this can be simpler or more complex depending
>>       on what to do during module load.
>>
>> When a module load:
>>     For each .BTF.extra entry, trying to match
>>     the corresponding types in the current kernel.
>>     The type in the current type should have same
>>     size as the one in .BTF.extra if otherwise
>>     layout in the module btf may change.
>>
>>     If new kernel type can be used for module BTF,
>>     simply replace the old id with new id in module BTF.
>>
>>     Otherwise, type mismatch may happen and the corresponding
>>     module btf type should be invalidated.
> Yes, I agree, see my reply to Alan. I'm just unsure how strict we want
> to be and whether we need to record fields of expected vmlinux BTF
> types. Or if just recording expected size would be enough (to ensure
> correct memory layout if base BTF type is embedded into module BTF
> type).
>
> Perhaps, if BTF type is referenced from some "trusted" BTF type (used
> by kfunc, or in BTF ID set) we might want to enforce strict
> compatibility, but for any other type just make sure that size is
> correct (if it matters at all; i.e., if base BTF type is referenced by
> pointer only, we don't even need to check size).

Agree. The above is a good start. I guess some real-world investigations
can help shape the actual design about what is the minimum change to
make it work.

>
> WDYT?
>
>>> Since minimization only keeps used struct and union members, couldn't
>>> you have two internal types from different modules which conflict and
>>> end up using the wrong offset?
>>>
>>> Example:
>>> in module M:
>>> struct S {
>>> ... // other unused members
>>> int x; // offset 12 (for example)
>>> }
>>>
>>> in module N:
>>> struct S {
>>> ... // other unused members
>>> int x; // offset 20 (something different from S.x in module M)
>>> }
>>>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-08  0:26       ` Andrii Nakryiko
@ 2024-02-08 22:45         ` Alan Maguire
  2024-02-09 19:50           ` Andrii Nakryiko
  0 siblings, 1 reply; 21+ messages in thread
From: Alan Maguire @ 2024-02-08 22:45 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Bryce Kahle, bpf, quentin, ast, daniel, Bryce Kahle

On 08/02/2024 00:26, Andrii Nakryiko wrote:
> On Tue, Feb 6, 2024 at 2:59 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>
>> On 02/02/2024 22:16, Andrii Nakryiko wrote:
>>> On Wed, Jan 31, 2024 at 10:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>>>
>>>> On 30/01/2024 23:05, Bryce Kahle wrote:
>>>>> From: Bryce Kahle <bryce.kahle@datadoghq.com>
>>>>>
>>>>> Enables a user to generate minimized kernel module BTF.
>>>>>
>>>>> If an eBPF program probes a function within a kernel module or uses
>>>>> types that come from a kernel module, split BTF is required. The split
>>>>> module BTF contains only the BTF types that are unique to the module.
>>>>> It will reference the base/vmlinux BTF types and always starts its type
>>>>> IDs at X+1 where X is the largest type ID in the base BTF.
>>>>>
>>>>> Minimization allows a user to ship only the types necessary to do
>>>>> relocations for the program(s) in the provided eBPF object file(s). A
>>>>> minimized module BTF will still not contain vmlinux BTF types, so you
>>>>> should always minimize the vmlinux file first, and then minimize the
>>>>> kernel module file.
>>>>>
>>>>> Example:
>>>>>
>>>>> bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
>>>>> bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
>>>>
>>>> This is great! I've been working on a somewhat related problem involving
>>>> split BTF for modules, and I'm trying to figure out if there's overlap
>>>> with what you've done here that can help in either direction. I'll try
>>>> and describe what I'm doing. Sorry if this is a bit of a diversion,
>>>> but I just want to check if there are potential ways your changes could
>>>> facilitate other scenarios in the future.
>>>>
>>>> The problem I'm trying to tackle is to enable split BTF module
>>>> generation to be more resilient to underlying kernel BTF changes;
>>>> this would allow for example a module that is not built with the kernel
>>>> to generate BTF and have it work even if small changes in vmlinux occur.
>>>> Even a small change in BTF ids in base BTF is enough to invalidate the
>>>> associated split BTF, so the question is how to make this a bit less
>>>> brittle. This won't be needed for modules built along with the kernel,
>>>> but more for cases like a package delivering a kernel module.
>>>>
>>>> The way this is done is similar to what you're doing - generating
>>>> minimal base vmlinux BTF along with the module BTF. In my case however
>>>> the minimization is not driven by CO-RE relocations; rather it is driven
>>>> by only adding types that are referenced by module BTF and any other
>>>> associated types needed. We end up with minimal base BTF that is carried
>>>> along with the module BTF (in a .BTF.base_minimal section) and this
>>>> minimal BTF will be used to later reconcile module BTF with the running
>>>> kernel BTF when the module is loaded; it essentially provides the
>>>> additional information needed to map to current vmlinux types.
>>>>
>>>> In this approach, minimal vmlinux BTF is generated via an additional
>>>> option to pahole which adds an extra phase to BTF deduplication between
>>>> module and kernel. Once we have found the candidate mappings for
>>>> deduplication, we can look at all base BTF references from module BTF
>>>> and recursively add associated types to the base minimal BTF. Finally we
>>>> reparent the split BTF to this minimal base BTF. Experiments show most
>>>> modules wind up with base minimal BTF of around 4000 types, so the
>>>> minimization seems to work well. But it's complex.
>>>>
>>>> So what I've been trying to work out is if this dedup complexity can be
>>>> eliminated with your changes, but from what I can see, the membership in
>>>> the minimal base BTF in your case is driven by the CO-RE relocations
>>>> used in the BPF program. Would there do you think be a future where we
>>>> would look at doing base minimal BTF generation by other criteria (like
>>>> references from the module BTF)? Thanks!
>>>
>>> Hm... I might be misremembering or missing something, but the problem
>>> you are solving doesn't seem to be related to BTF minimization. I also
>>> forgot why you need BTF deduplication, I vaguely remember we needed to
>>> remember "expectations" of types that module BTF references in vmlinux
>>> BTF, but I fail to remember why we needed dedup... Perhaps we need a
>>> BPF office hours session to go over details again?
>>>
>>
>> Yeah, that would be great! I've put
>>
>> Making split BTF more resilient
>>
>> ..on the agenda for 02-15.
>>
>> The reason BTF minimization comes into the picture is this - the
>> expectations split BTF can have of base BTF can be quite complex, and in
>> figuring out ways to represent them, it occurred that BTF itself - in
>> the form of the minimal BTF needed to represent those split BTF
>> references - made sense. Consider cases like a split BTF struct that
>> contains a base BTF struct embedded in it. If we have a minimal base BTF
>> which contains such needed base types, we are in a position to use it to
>> later reconcile the base BTF worlds at encoding time and use time (for
>> example vmlinux BTF at module build time versus current vmlinux BTF).
>>
>> Further, a natural time to construct that minimal base BTF presents
>> itself when we do deduplication between split and base BTF.  The phase
>> after we have mapped split types to canonical types is the ideal time to
>> handle this; the algorithm is basically
>>
>> - foreach reference from split -> base BTF
>>  - add it to base minimal BTF
>> This is controlled by a new dedup option - gen_base_btf_minimal - which
>> would be enabled via  a ---btf_features option to pahole for users who
>> wanted to generate minimal base BTF. pahole places the new minimized
>> base BTF in .BTF.base_minimal section, with the split BTF referring to
>> it in the usual .BTF section. Later this base minimal BTF is used to
>> reconcile the split BTF expectations with current base BTF.
>>
>> The kinds of minimizations I see are pretty reasonable for kernel
>> modules; I tried a number of in-tree modules (which wouldn't use this
>> feature in practice, just wanted to have something to test with), and
>> around 4000 types were observed in base minimal BTF.
>>
>> It's possible we could adapt this minimization process to be guided
>> by CO-RE relocations (rather than split->base BTF references), if that
>> would help Bryce's case.
> 
> I think this minimization idea is overcomplicating anything. First, we
> don't have CO-RE relocations, and from BTF alone we don't know what
> fields of base BTF structs module is referencing (that may or may not
> be in DWARF). So I don't think there is anything to minimize.
> 

The minimization is a method to capture expectations of base BTF similar
to what you describe below. In the approach I've been pursuing, we
capture those expectations via the minimal base BTF needed to represent
the types the module needs.

> On the other hand, it seems reasonable to record a few basic things
> about base BTF type expectations:
>   - name
>   - size and whether that size has to be exact. This would be
> determined if base BTF type is ever embedded or is only referenced by
> pointer;
>   - we can record number of fields, but you said you want to enable
> extensions, so it will have to be treated as minimum number of fields,
> probably?
>

Yeah, the motivation here is that often when changes are backported to
stable release-based distros, the associated struct changes try to fill
holes in existing structures so that overall structure size does not
change in an incompatible way, and any modules that utilize such
structures continue to work.

> Basically, all we want to ensure is that overall memory layout is
> compatible and doesn't cause any module field to be shifted.
>

There are a few other gotchas though. Consider the case of an enum; if
the values associated with it get shifted between the time the module is
built and the time it is used, and ENUM_VAL_X that was 1 when the module
was built, but is now 2 in base vmlinux, we'd need to track that as an
incompatibility too.

A minimized view of base BTF - driven by the types the module needs -
can capture these changes along with the field offset/size issues. The
approach I use today also avoids expanding types unnecessarily; when it
encounters a pointer to struct foo in the module representation only,
the minimized base BTF will just use a fwd representation of that struct
in minimal base BTF.

So to summarize, base BTF minimization is driven by the need to capture
the set of expectations the module has, similar to what you describe above.

Alan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-08  1:56                 ` Yonghong Song
@ 2024-02-08 23:01                   ` Alan Maguire
  2024-02-09 19:58                     ` Andrii Nakryiko
  0 siblings, 1 reply; 21+ messages in thread
From: Alan Maguire @ 2024-02-08 23:01 UTC (permalink / raw)
  To: Yonghong Song, Andrii Nakryiko
  Cc: Bryce Kahle, Bryce Kahle, Quentin Monnet, bpf, ast, daniel

On 08/02/2024 01:56, Yonghong Song wrote:
> 
> On 2/7/24 4:30 PM, Andrii Nakryiko wrote:
>> On Wed, Feb 7, 2024 at 2:38 PM Yonghong Song <yonghong.song@linux.dev>
>> wrote:
>>>
>>> On 2/7/24 10:51 AM, Bryce Kahle wrote:
>>>> On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
>>>> <andrii.nakryiko@gmail.com> wrote:
>>>>> 3) btf__dedup() will deduplicate everything, so that only unique type
>>>>> definitions remain.
>>>>>
>>> A random thought about another way.
>>> At module side, we keep
>>>     - module btf
>>>     - another section (e.g. .BTF.extra) to keep minimum kernel-side
>>>       types which directly used by module btf
>>>

Yep, that's exactly the approach I was pursuing; an extra section
containing those types (I was calling it .BTF.base_minimal).

>>>     for example, module btf has
>>>       struct foo {
>>>         struct task_struct *t;
>>>       }
>>>       module btf encoding will have id, say 20,
>>>       for 'struct task_struct' which is at that time
>>>       the id in linux kernel.
>>>     Then the module .BTF.extra contains
>>>       id 20: struct task_struct type encoding
>>>       there is no need to encode more types beyond pointers.
>>>       this can be simpler or more complex depending
>>>       on what to do during module load.
>>>

Right, or in BTF you can use a FWD declaration for task_struct. The
approach I'm using explicitly identifies types that are only
pointer-referenced and uses FWDS for them, and this helps keep the
representation as small as possible.

>>> When a module load:
>>>     For each .BTF.extra entry, trying to match
>>>     the corresponding types in the current kernel.
>>>     The type in the current type should have same
>>>     size as the one in .BTF.extra if otherwise
>>>     layout in the module btf may change.
>>>
>>>     If new kernel type can be used for module BTF,
>>>     simply replace the old id with new id in module BTF.
>>>
>>>     Otherwise, type mismatch may happen and the corresponding
>>>     module btf type should be invalidated.

Yep, this is the process I describe as reconciliation; where we make
sure base BTF at encoding time and current vmlinux BTF are compatible,
and if so we renumber base BTF references in the module using the
current vmlinux BTF ids. So if compatible, after reconciliation the
module BTF looks just like any other module BTF built against that exact
vmlinux.

>> Yes, I agree, see my reply to Alan. I'm just unsure how strict we want
>> to be and whether we need to record fields of expected vmlinux BTF
>> types. Or if just recording expected size would be enough (to ensure
>> correct memory layout if base BTF type is embedded into module BTF
>> type).
>>
>> Perhaps, if BTF type is referenced from some "trusted" BTF type (used
>> by kfunc, or in BTF ID set) we might want to enforce strict
>> compatibility, but for any other type just make sure that size is
>> correct (if it matters at all; i.e., if base BTF type is referenced by
>> pointer only, we don't even need to check size).
> 
> Agree. The above is a good start. I guess some real-world investigations
> can help shape the actual design about what is the minimum change to
> make it work.
> 

I'll try and send a pointer to the work-in-progress code prior to the
BPF office hours next week. In investigating how much info is required,
for most in-tree modules (which I force-built with minimal BTF) we ended
up with information about 4000 types or so. So it's a significant
minimization compared to vmlinux BTF.

In this context, perhaps my describing the information we collect about
base BTF as minimization is misleading; the intent is really focused not
on making base BTF small (although of course that's important from a
practical perspective), but collecting the info about base BTF needed to
later reconcile it with the running kernel at load time. Maybe
.BTF.base_expects or something like that might make this clearer? Thanks!

Alan
>>
>> WDYT?
>>
>>>> Since minimization only keeps used struct and union members, couldn't
>>>> you have two internal types from different modules which conflict and
>>>> end up using the wrong offset?
>>>>
>>>> Example:
>>>> in module M:
>>>> struct S {
>>>> ... // other unused members
>>>> int x; // offset 12 (for example)
>>>> }
>>>>
>>>> in module N:
>>>> struct S {
>>>> ... // other unused members
>>>> int x; // offset 20 (something different from S.x in module M)
>>>> }
>>>>
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-08 22:45         ` Alan Maguire
@ 2024-02-09 19:50           ` Andrii Nakryiko
  0 siblings, 0 replies; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-09 19:50 UTC (permalink / raw)
  To: Alan Maguire; +Cc: Bryce Kahle, bpf, quentin, ast, daniel, Bryce Kahle

On Thu, Feb 8, 2024 at 2:45 PM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 08/02/2024 00:26, Andrii Nakryiko wrote:
> > On Tue, Feb 6, 2024 at 2:59 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>
> >> On 02/02/2024 22:16, Andrii Nakryiko wrote:
> >>> On Wed, Jan 31, 2024 at 10:47 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>>>
> >>>> On 30/01/2024 23:05, Bryce Kahle wrote:
> >>>>> From: Bryce Kahle <bryce.kahle@datadoghq.com>
> >>>>>
> >>>>> Enables a user to generate minimized kernel module BTF.
> >>>>>
> >>>>> If an eBPF program probes a function within a kernel module or uses
> >>>>> types that come from a kernel module, split BTF is required. The split
> >>>>> module BTF contains only the BTF types that are unique to the module.
> >>>>> It will reference the base/vmlinux BTF types and always starts its type
> >>>>> IDs at X+1 where X is the largest type ID in the base BTF.
> >>>>>
> >>>>> Minimization allows a user to ship only the types necessary to do
> >>>>> relocations for the program(s) in the provided eBPF object file(s). A
> >>>>> minimized module BTF will still not contain vmlinux BTF types, so you
> >>>>> should always minimize the vmlinux file first, and then minimize the
> >>>>> kernel module file.
> >>>>>
> >>>>> Example:
> >>>>>
> >>>>> bpftool gen min_core_btf vmlinux.btf vm-min.btf prog.bpf.o
> >>>>> bpftool -B vm-min.btf gen min_core_btf mod.btf mod-min.btf prog.bpf.o
> >>>>
> >>>> This is great! I've been working on a somewhat related problem involving
> >>>> split BTF for modules, and I'm trying to figure out if there's overlap
> >>>> with what you've done here that can help in either direction. I'll try
> >>>> and describe what I'm doing. Sorry if this is a bit of a diversion,
> >>>> but I just want to check if there are potential ways your changes could
> >>>> facilitate other scenarios in the future.
> >>>>
> >>>> The problem I'm trying to tackle is to enable split BTF module
> >>>> generation to be more resilient to underlying kernel BTF changes;
> >>>> this would allow for example a module that is not built with the kernel
> >>>> to generate BTF and have it work even if small changes in vmlinux occur.
> >>>> Even a small change in BTF ids in base BTF is enough to invalidate the
> >>>> associated split BTF, so the question is how to make this a bit less
> >>>> brittle. This won't be needed for modules built along with the kernel,
> >>>> but more for cases like a package delivering a kernel module.
> >>>>
> >>>> The way this is done is similar to what you're doing - generating
> >>>> minimal base vmlinux BTF along with the module BTF. In my case however
> >>>> the minimization is not driven by CO-RE relocations; rather it is driven
> >>>> by only adding types that are referenced by module BTF and any other
> >>>> associated types needed. We end up with minimal base BTF that is carried
> >>>> along with the module BTF (in a .BTF.base_minimal section) and this
> >>>> minimal BTF will be used to later reconcile module BTF with the running
> >>>> kernel BTF when the module is loaded; it essentially provides the
> >>>> additional information needed to map to current vmlinux types.
> >>>>
> >>>> In this approach, minimal vmlinux BTF is generated via an additional
> >>>> option to pahole which adds an extra phase to BTF deduplication between
> >>>> module and kernel. Once we have found the candidate mappings for
> >>>> deduplication, we can look at all base BTF references from module BTF
> >>>> and recursively add associated types to the base minimal BTF. Finally we
> >>>> reparent the split BTF to this minimal base BTF. Experiments show most
> >>>> modules wind up with base minimal BTF of around 4000 types, so the
> >>>> minimization seems to work well. But it's complex.
> >>>>
> >>>> So what I've been trying to work out is if this dedup complexity can be
> >>>> eliminated with your changes, but from what I can see, the membership in
> >>>> the minimal base BTF in your case is driven by the CO-RE relocations
> >>>> used in the BPF program. Would there do you think be a future where we
> >>>> would look at doing base minimal BTF generation by other criteria (like
> >>>> references from the module BTF)? Thanks!
> >>>
> >>> Hm... I might be misremembering or missing something, but the problem
> >>> you are solving doesn't seem to be related to BTF minimization. I also
> >>> forgot why you need BTF deduplication, I vaguely remember we needed to
> >>> remember "expectations" of types that module BTF references in vmlinux
> >>> BTF, but I fail to remember why we needed dedup... Perhaps we need a
> >>> BPF office hours session to go over details again?
> >>>
> >>
> >> Yeah, that would be great! I've put
> >>
> >> Making split BTF more resilient
> >>
> >> ..on the agenda for 02-15.
> >>
> >> The reason BTF minimization comes into the picture is this - the
> >> expectations split BTF can have of base BTF can be quite complex, and in
> >> figuring out ways to represent them, it occurred that BTF itself - in
> >> the form of the minimal BTF needed to represent those split BTF
> >> references - made sense. Consider cases like a split BTF struct that
> >> contains a base BTF struct embedded in it. If we have a minimal base BTF
> >> which contains such needed base types, we are in a position to use it to
> >> later reconcile the base BTF worlds at encoding time and use time (for
> >> example vmlinux BTF at module build time versus current vmlinux BTF).
> >>
> >> Further, a natural time to construct that minimal base BTF presents
> >> itself when we do deduplication between split and base BTF.  The phase
> >> after we have mapped split types to canonical types is the ideal time to
> >> handle this; the algorithm is basically
> >>
> >> - foreach reference from split -> base BTF
> >>  - add it to base minimal BTF
> >> This is controlled by a new dedup option - gen_base_btf_minimal - which
> >> would be enabled via  a ---btf_features option to pahole for users who
> >> wanted to generate minimal base BTF. pahole places the new minimized
> >> base BTF in .BTF.base_minimal section, with the split BTF referring to
> >> it in the usual .BTF section. Later this base minimal BTF is used to
> >> reconcile the split BTF expectations with current base BTF.
> >>
> >> The kinds of minimizations I see are pretty reasonable for kernel
> >> modules; I tried a number of in-tree modules (which wouldn't use this
> >> feature in practice, just wanted to have something to test with), and
> >> around 4000 types were observed in base minimal BTF.
> >>
> >> It's possible we could adapt this minimization process to be guided
> >> by CO-RE relocations (rather than split->base BTF references), if that
> >> would help Bryce's case.
> >
> > I think this minimization idea is overcomplicating anything. First, we
> > don't have CO-RE relocations, and from BTF alone we don't know what
> > fields of base BTF structs module is referencing (that may or may not
> > be in DWARF). So I don't think there is anything to minimize.
> >
>
> The minimization is a method to capture expectations of base BTF similar

Important part of btfgen's minimization is about keeping only used
fields (according to CO-RE relocs) and stripping away everything else.
Your "minimization" is quite different, and so referring to both as
"minimization" is just going to confuse things.

> to what you describe below. In the approach I've been pursuing, we
> capture those expectations via the minimal base BTF needed to represent
> the types the module needs.
>
> > On the other hand, it seems reasonable to record a few basic things
> > about base BTF type expectations:
> >   - name
> >   - size and whether that size has to be exact. This would be
> > determined if base BTF type is ever embedded or is only referenced by
> > pointer;
> >   - we can record number of fields, but you said you want to enable
> > extensions, so it will have to be treated as minimum number of fields,
> > probably?
> >
>
> Yeah, the motivation here is that often when changes are backported to
> stable release-based distros, the associated struct changes try to fill
> holes in existing structures so that overall structure size does not
> change in an incompatible way, and any modules that utilize such
> structures continue to work.
>
> > Basically, all we want to ensure is that overall memory layout is
> > compatible and doesn't cause any module field to be shifted.
> >
>
> There are a few other gotchas though. Consider the case of an enum; if
> the values associated with it get shifted between the time the module is
> built and the time it is used, and ENUM_VAL_X that was 1 when the module
> was built, but is now 2 in base vmlinux, we'd need to track that as an
> incompatibility too.

Enum case is a bit weird. If enum is defined in vmlinux BTF, then the
base kernel is built and using that definition of enum, right? So even
if a module's enum definition is different (different integer values),
base's enum definition should probably be used instead in BTF, no?

>
> A minimized view of base BTF - driven by the types the module needs -
> can capture these changes along with the field offset/size issues. The
> approach I use today also avoids expanding types unnecessarily; when it
> encounters a pointer to struct foo in the module representation only,
> the minimized base BTF will just use a fwd representation of that struct
> in minimal base BTF.

So this is basically the only common part with btfgen's minimization,
but overall they are quite different, which is why I'm suggesting to
not combine them.

>
> So to summarize, base BTF minimization is driven by the need to capture
> the set of expectations the module has, similar to what you describe above.
>
> Alan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-08 23:01                   ` Alan Maguire
@ 2024-02-09 19:58                     ` Andrii Nakryiko
  0 siblings, 0 replies; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-09 19:58 UTC (permalink / raw)
  To: Alan Maguire
  Cc: Yonghong Song, Bryce Kahle, Bryce Kahle, Quentin Monnet, bpf,
	ast, daniel

On Thu, Feb 8, 2024 at 3:01 PM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 08/02/2024 01:56, Yonghong Song wrote:
> >
> > On 2/7/24 4:30 PM, Andrii Nakryiko wrote:
> >> On Wed, Feb 7, 2024 at 2:38 PM Yonghong Song <yonghong.song@linux.dev>
> >> wrote:
> >>>
> >>> On 2/7/24 10:51 AM, Bryce Kahle wrote:
> >>>> On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
> >>>> <andrii.nakryiko@gmail.com> wrote:
> >>>>> 3) btf__dedup() will deduplicate everything, so that only unique type
> >>>>> definitions remain.
> >>>>>
> >>> A random thought about another way.
> >>> At module side, we keep
> >>>     - module btf
> >>>     - another section (e.g. .BTF.extra) to keep minimum kernel-side
> >>>       types which directly used by module btf
> >>>
>
> Yep, that's exactly the approach I was pursuing; an extra section
> containing those types (I was calling it .BTF.base_minimal).
>
> >>>     for example, module btf has
> >>>       struct foo {
> >>>         struct task_struct *t;
> >>>       }
> >>>       module btf encoding will have id, say 20,
> >>>       for 'struct task_struct' which is at that time
> >>>       the id in linux kernel.
> >>>     Then the module .BTF.extra contains
> >>>       id 20: struct task_struct type encoding
> >>>       there is no need to encode more types beyond pointers.
> >>>       this can be simpler or more complex depending
> >>>       on what to do during module load.
> >>>
>
> Right, or in BTF you can use a FWD declaration for task_struct. The
> approach I'm using explicitly identifies types that are only
> pointer-referenced and uses FWDS for them, and this helps keep the
> representation as small as possible.
>
> >>> When a module load:
> >>>     For each .BTF.extra entry, trying to match
> >>>     the corresponding types in the current kernel.
> >>>     The type in the current type should have same
> >>>     size as the one in .BTF.extra if otherwise
> >>>     layout in the module btf may change.
> >>>
> >>>     If new kernel type can be used for module BTF,
> >>>     simply replace the old id with new id in module BTF.
> >>>
> >>>     Otherwise, type mismatch may happen and the corresponding
> >>>     module btf type should be invalidated.
>
> Yep, this is the process I describe as reconciliation; where we make
> sure base BTF at encoding time and current vmlinux BTF are compatible,
> and if so we renumber base BTF references in the module using the
> current vmlinux BTF ids. So if compatible, after reconciliation the
> module BTF looks just like any other module BTF built against that exact
> vmlinux.
>
> >> Yes, I agree, see my reply to Alan. I'm just unsure how strict we want
> >> to be and whether we need to record fields of expected vmlinux BTF
> >> types. Or if just recording expected size would be enough (to ensure
> >> correct memory layout if base BTF type is embedded into module BTF
> >> type).
> >>
> >> Perhaps, if BTF type is referenced from some "trusted" BTF type (used
> >> by kfunc, or in BTF ID set) we might want to enforce strict
> >> compatibility, but for any other type just make sure that size is
> >> correct (if it matters at all; i.e., if base BTF type is referenced by
> >> pointer only, we don't even need to check size).
> >
> > Agree. The above is a good start. I guess some real-world investigations
> > can help shape the actual design about what is the minimum change to
> > make it work.
> >
>
> I'll try and send a pointer to the work-in-progress code prior to the
> BPF office hours next week. In investigating how much info is required,
> for most in-tree modules (which I force-built with minimal BTF) we ended
> up with information about 4000 types or so. So it's a significant
> minimization compared to vmlinux BTF.

4000 is still quite a lot and is a significant fraction of vmlinux BTF
types. I'm curious if you measured the size increase from recording
these types?

>
> In this context, perhaps my describing the information we collect about
> base BTF as minimization is misleading; the intent is really focused not
> on making base BTF small (although of course that's important from a
> practical perspective), but collecting the info about base BTF needed to
> later reconcile it with the running kernel at load time. Maybe
> .BTF.base_expects or something like that might make this clearer? Thanks!
>
> Alan
> >>
> >> WDYT?
> >>
> >>>> Since minimization only keeps used struct and union members, couldn't
> >>>> you have two internal types from different modules which conflict and
> >>>> end up using the wrong offset?
> >>>>
> >>>> Example:
> >>>> in module M:
> >>>> struct S {
> >>>> ... // other unused members
> >>>> int x; // offset 12 (for example)
> >>>> }
> >>>>
> >>>> in module N:
> >>>> struct S {
> >>>> ... // other unused members
> >>>> int x; // offset 20 (something different from S.x in module M)
> >>>> }
> >>>>
> >

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-05 18:21         ` Andrii Nakryiko
  2024-02-07 18:51           ` Bryce Kahle
@ 2024-02-26 21:48           ` Bryce Kahle
  2024-02-29  0:59             ` Andrii Nakryiko
  1 sibling, 1 reply; 21+ messages in thread
From: Bryce Kahle @ 2024-02-26 21:48 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Bryce Kahle, Quentin Monnet, bpf, ast, daniel

On Fri, Feb 2, 2024, at 2:10 PM, Andrii Nakryiko wrote:
> But yes, you'd have to specify both vmlinux and all the module
> BTFs at the same time (which bpftool allows you to do easily with its
> CLI interface, so not really a problem)

I didn't see a way to specify a directory for vmlinux and all the
modules BTFs. Are you suggesting I specify the path to each
individually? I didn't see a way to do that with the current CLI api.
It assumes that the input is only a single path.

On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
> 2) append each module BTF to that instance (we have btf__add_btf() API
> already for that). This will rewrite type IDs for each module
> (shifting them by some constant number)

It looks like btf__add_btf() doesn't support split BTF, which the
module BTF has to be in order for it to parse correctly. Any
suggestions for how to proceed? Do I need to add support for split BTF
to btf__add_btf() to libbpf? If so, any thoughts on how that should
work would be appreciated.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-26 21:48           ` Bryce Kahle
@ 2024-02-29  0:59             ` Andrii Nakryiko
  2024-02-29 15:24               ` Quentin Monnet
  0 siblings, 1 reply; 21+ messages in thread
From: Andrii Nakryiko @ 2024-02-29  0:59 UTC (permalink / raw)
  To: Bryce Kahle; +Cc: Bryce Kahle, Quentin Monnet, bpf, ast, daniel

On Mon, Feb 26, 2024 at 1:48 PM Bryce Kahle <bryce.kahle@datadoghq.com> wrote:
>
> On Fri, Feb 2, 2024, at 2:10 PM, Andrii Nakryiko wrote:
> > But yes, you'd have to specify both vmlinux and all the module
> > BTFs at the same time (which bpftool allows you to do easily with its
> > CLI interface, so not really a problem)
>
> I didn't see a way to specify a directory for vmlinux and all the
> modules BTFs. Are you suggesting I specify the path to each
> individually? I didn't see a way to do that with the current CLI api.
> It assumes that the input is only a single path.

so right now we have

bpftool min_core_btf <input-btf> <output-btf> <input1.bpf.o> ... <inputN.bpf.o>

so we'd have to either add a flag and do

bpftool min_core_btf <input-btf> -E <extra-btf1> -E <extra-btf2>
<output-btf> ...

or define special key/value pair (we do that for other commands to
specify extra options):

bpftool min_core_btf <input-btf> extra <extra-btf-1> extra
<extra-btf-2> <output-btf> ....

This has a tiny chance that user used "extra" as a name of one of
input object file (we can probably disregard).

Yet another option is to introduce new command, something like
`bpftool min_core_btf_multi ...` and define new convention.


OR. We can pivot and say that we do what you want as two steps:

1) generate one large combined BTF from multiple BTFs, something along
the lines of `bpftool btf merge <btf1> ... <btfN>`. We'd need to
specify how split BTF should be handled.

2) then use existing min_core_btf command with this merged BTF

I don't know what's best.

>
> On Mon, Feb 5, 2024 at 10:21 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> > 2) append each module BTF to that instance (we have btf__add_btf() API
> > already for that). This will rewrite type IDs for each module
> > (shifting them by some constant number)
>
> It looks like btf__add_btf() doesn't support split BTF, which the
> module BTF has to be in order for it to parse correctly. Any
> suggestions for how to proceed? Do I need to add support for split BTF
> to btf__add_btf() to libbpf? If so, any thoughts on how that should
> work would be appreciated.

Yep, seems like it is explicitly not supported. I think one of the
problems with split BTF is that you don't want to append shared types
from base BTF, because that would be a waste. So you need a variant of
btf__add_btf() that allows you to specify a range of types to append.
But at that point what to do if some of the added types reference
types that were not appended? A bunch of unpleasant issues to be dealt
with.

So perhaps instead of using btf__add_btf(), bpftool can just do
btf__add_type() API, which remaps strings properly, but doesn't touch
type IDs. Libbpf has internal btf_type_visit_type_ids() function that
calls provided callback for each field that contains type ID. bpftool
can do its own remapping.

If we assume that we are merging vmlinux BTF and a bunch of module
BTFs, then this remapping is actually pretty straightforward: if type
ID belongs to base BTF, don't touch it. Otherwise shift it by some
amount, common for each type within the module's BTF.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf
  2024-02-29  0:59             ` Andrii Nakryiko
@ 2024-02-29 15:24               ` Quentin Monnet
  0 siblings, 0 replies; 21+ messages in thread
From: Quentin Monnet @ 2024-02-29 15:24 UTC (permalink / raw)
  To: Andrii Nakryiko, Bryce Kahle; +Cc: Bryce Kahle, bpf, ast, daniel

2024-02-29 00:59 UTC+0000 ~ Andrii Nakryiko <andrii.nakryiko@gmail.com>
> On Mon, Feb 26, 2024 at 1:48 PM Bryce Kahle <bryce.kahle@datadoghq.com> wrote:
>>
>> On Fri, Feb 2, 2024, at 2:10 PM, Andrii Nakryiko wrote:
>>> But yes, you'd have to specify both vmlinux and all the module
>>> BTFs at the same time (which bpftool allows you to do easily with its
>>> CLI interface, so not really a problem)
>>
>> I didn't see a way to specify a directory for vmlinux and all the
>> modules BTFs. Are you suggesting I specify the path to each
>> individually? I didn't see a way to do that with the current CLI api.
>> It assumes that the input is only a single path.
> 
> so right now we have
> 
> bpftool min_core_btf <input-btf> <output-btf> <input1.bpf.o> ... <inputN.bpf.o>
> 
> so we'd have to either add a flag and do
> 
> bpftool min_core_btf <input-btf> -E <extra-btf1> -E <extra-btf2>
> <output-btf> ...
> 
> or define special key/value pair (we do that for other commands to
> specify extra options):
> 
> bpftool min_core_btf <input-btf> extra <extra-btf-1> extra
> <extra-btf-2> <output-btf> ....
> 
> This has a tiny chance that user used "extra" as a name of one of
> input object file (we can probably disregard).
> 
> Yet another option is to introduce new command, something like
> `bpftool min_core_btf_multi ...` and define new convention.
> 
> 
> OR. We can pivot and say that we do what you want as two steps:
> 
> 1) generate one large combined BTF from multiple BTFs, something along
> the lines of `bpftool btf merge <btf1> ... <btfN>`. We'd need to
> specify how split BTF should be handled.
> 
> 2) then use existing min_core_btf command with this merged BTF
> 
> I don't know what's best.

The two steps is maybe the cleanest option with regards to the command
line syntax, but it doesn't feel great to impose an additional step to
the user just because we don't want to rework the syntax, I suppose.

I'm not a fan of the multiple "-E" flags, it's not super consistent with
what we have for other commands. I'd probably go with the "extra"
keywords, or the new subcommand name (and mark the former as deprecated?).

Yet another option could be to support two alternatives syntaxes for the
existing subcommand if the first argument is, say, "input_btf" (and then
define this new syntax using keywords for all arguments).

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2024-02-29 15:24 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-30 23:05 [PATCH bpf-next v4] bpftool: add support for split BTF to gen min_core_btf Bryce Kahle
2024-01-31 18:13 ` Alan Maguire
2024-02-02 22:16   ` Andrii Nakryiko
2024-02-06 10:59     ` Alan Maguire
2024-02-08  0:26       ` Andrii Nakryiko
2024-02-08 22:45         ` Alan Maguire
2024-02-09 19:50           ` Andrii Nakryiko
2024-02-01  0:54 ` Quentin Monnet
2024-02-01 21:05   ` Bryce Kahle
2024-02-02 22:10     ` Andrii Nakryiko
2024-02-03  0:58       ` Bryce Kahle
2024-02-05 18:21         ` Andrii Nakryiko
2024-02-07 18:51           ` Bryce Kahle
2024-02-07 22:38             ` Yonghong Song
2024-02-08  0:30               ` Andrii Nakryiko
2024-02-08  1:56                 ` Yonghong Song
2024-02-08 23:01                   ` Alan Maguire
2024-02-09 19:58                     ` Andrii Nakryiko
2024-02-26 21:48           ` Bryce Kahle
2024-02-29  0:59             ` Andrii Nakryiko
2024-02-29 15:24               ` Quentin Monnet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.