bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next] btf: expose BTF info through sysfs
@ 2019-08-07 18:38 Andrii Nakryiko
  2019-08-08  6:09 ` Greg KH
  2019-08-08 10:55 ` Jiri Olsa
  0 siblings, 2 replies; 4+ messages in thread
From: Andrii Nakryiko @ 2019-08-07 18:38 UTC (permalink / raw)
  To: bpf, netdev, ast, daniel, yhs
  Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Masahiro Yamada,
	Arnaldo Carvalho de Melo, Jiri Olsa

Make .BTF section allocated and expose its contents through sysfs.

/sys/kernel/btf directory is created to contain all the BTFs present
inside kernel. Currently there is only kernel's main BTF, represented as
/sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported,
each module will expose its BTF as /sys/kernel/btf/<module-name> file.

Current approach relies on a few pieces coming together:
1. pahole is used to take almost final vmlinux image (modulo .BTF and
   kallsyms) and generate .BTF section by converting DWARF info into
   BTF. This section is not allocated and not mapped to any segment,
   though, so is not yet accessible from inside kernel at runtime.
2. objcopy dumps .BTF contents into binary file and subsequently
   convert binary file into linkable object file with automatically
   generated symbols _binary__btf_kernel_bin_start and
   _binary__btf_kernel_bin_end, pointing to start and end, respectively,
   of BTF raw data.
3. final vmlinux image is generated by linking this object file (and
   kallsyms, if necessary). sysfs_btf.c then creates
   /sys/kernel/btf/kernel file and exposes embedded BTF contents through
   it. This allows, e.g., libbpf and bpftool access BTF info at
   well-known location, without resorting to searching for vmlinux image
   on disk (location of which is not standardized and vmlinux image
   might not be even available in some scenarios, e.g., inside qemu
   during testing).

Alternative approach using .incbin assembler directive to embed BTF
contents directly was attempted but didn't work, because sysfs_proc.o is
not re-compiled during link-vmlinux.sh stage. This is required, though,
to update embedded BTF data (initially empty data is embedded, then
pahole generates BTF info and we need to regenerate sysfs_btf.o with
updated contents, but it's too late at that point).

If BTF couldn't be generated due to missing or too old pahole,
sysfs_btf.c handles that gracefully by detecting that
_binary__btf_kernel_bin_start (weak symbol) is 0 and not creating
/sys/kernel/btf at all.

Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 kernel/bpf/Makefile     |  3 +++
 kernel/bpf/sysfs_btf.c  | 52 ++++++++++++++++++++++++++++++++++++++++
 scripts/link-vmlinux.sh | 53 ++++++++++++++++++++++++++---------------
 3 files changed, 89 insertions(+), 19 deletions(-)
 create mode 100644 kernel/bpf/sysfs_btf.c

diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 29d781061cd5..e1d9adb212f9 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
 ifeq ($(CONFIG_INET),y)
 obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
 endif
+ifeq ($(CONFIG_SYSFS),y)
+obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
+endif
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
new file mode 100644
index 000000000000..ac06ce1d62e8
--- /dev/null
+++ b/kernel/bpf/sysfs_btf.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide kernel BTF information for introspection and use by eBPF tools.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <linux/init.h>
+
+/* See scripts/link-vmlinux.sh, gen_btf() func for details */
+extern char __weak _binary__btf_kernel_bin_start[];
+extern char __weak _binary__btf_kernel_bin_end[];
+
+static ssize_t
+btf_kernel_read(struct file *file, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
+		char *buf, loff_t off, size_t len)
+{
+	memcpy(buf, _binary__btf_kernel_bin_start + off, len);
+	return len;
+}
+
+static struct bin_attribute btf_kernel_attr __ro_after_init = {
+	.attr = {
+		.name = "kernel",
+		.mode = 0444,
+	},
+	.read = btf_kernel_read,
+};
+
+static struct bin_attribute *btf_attrs[] __ro_after_init = {
+	&btf_kernel_attr,
+	NULL,
+};
+
+static struct attribute_group btf_group_attr __ro_after_init = {
+	.name = "btf",
+	.bin_attrs = btf_attrs,
+};
+
+static int __init btf_kernel_init(void)
+{
+	if (!_binary__btf_kernel_bin_start)
+		return 0;
+
+	btf_kernel_attr.size = _binary__btf_kernel_bin_end -
+			       _binary__btf_kernel_bin_start;
+
+	return sysfs_create_group(kernel_kobj, &btf_group_attr);
+}
+
+subsys_initcall(btf_kernel_init);
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a7124f895b24..a45ba0baf21f 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -56,8 +56,8 @@ modpost_link()
 }
 
 # Link of vmlinux
-# ${1} - optional extra .o files
-# ${2} - output file
+# ${1} - output file
+# ${@:2} - optional extra .o files
 vmlinux_link()
 {
 	local lds="${objtree}/${KBUILD_LDS}"
@@ -70,9 +70,9 @@ vmlinux_link()
 			--start-group				\
 			${KBUILD_VMLINUX_LIBS}			\
 			--end-group				\
-			${1}"
+			${@:2}"
 
-		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}	\
+		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1}	\
 			-T ${lds} ${objects}
 	else
 		objects="-Wl,--whole-archive			\
@@ -81,9 +81,9 @@ vmlinux_link()
 			-Wl,--start-group			\
 			${KBUILD_VMLINUX_LIBS}			\
 			-Wl,--end-group				\
-			${1}"
+			${@:2}"
 
-		${CC} ${CFLAGS_vmlinux} -o ${2}			\
+		${CC} ${CFLAGS_vmlinux} -o ${1}			\
 			-Wl,-T,${lds}				\
 			${objects}				\
 			-lutil -lrt -lpthread
@@ -92,23 +92,35 @@ vmlinux_link()
 }
 
 # generate .BTF typeinfo from DWARF debuginfo
+# ${1} - vmlinux image
+# ${2} - file to dump raw BTF data into
 gen_btf()
 {
-	local pahole_ver;
+	local pahole_ver
+	local bin_arch
 
 	if ! [ -x "$(command -v ${PAHOLE})" ]; then
 		info "BTF" "${1}: pahole (${PAHOLE}) is not available"
-		return 0
+		return 1
 	fi
 
 	pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
 	if [ "${pahole_ver}" -lt "113" ]; then
 		info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
-		return 0
+		return 1
 	fi
 
-	info "BTF" ${1}
+	vmlinux_link ${1}
+
+	info "BTF" ${2}
 	LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+
+	# dump .BTF section into raw binary file to link with final vmlinux
+	bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \
+		cut -d, -f1 | cut -d' ' -f2)
+	${OBJCOPY} --dump-section .BTF=.btf.kernel.bin ${1} 2>/dev/null
+	${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \
+		--rename-section .data=.BTF .btf.kernel.bin ${2}
 }
 
 # Create ${2} .o file with all symbols from the ${1} object file
@@ -153,6 +165,7 @@ sortextable()
 # Delete output files in case of error
 cleanup()
 {
+	rm -f .btf.*
 	rm -f .tmp_System.map
 	rm -f .tmp_kallsyms*
 	rm -f .tmp_vmlinux*
@@ -215,6 +228,13 @@ ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
 info MODINFO modules.builtin.modinfo
 ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
 
+btf_kernel_bin_o=""
+if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
+	if gen_btf .tmp_vmlinux_btf .btf.kernel.bin.o ; then
+		btf_kernel_bin_o=.btf.kernel.bin.o
+	fi
+fi
+
 kallsymso=""
 kallsyms_vmlinux=""
 if [ -n "${CONFIG_KALLSYMS}" ]; then
@@ -246,11 +266,11 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
 	kallsyms_vmlinux=.tmp_vmlinux2
 
 	# step 1
-	vmlinux_link "" .tmp_vmlinux1
+	vmlinux_link .tmp_vmlinux1 ${btf_kernel_bin_o}
 	kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o
 
 	# step 2
-	vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2
+	vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_kernel_bin_o}
 	kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o
 
 	# step 3
@@ -261,18 +281,13 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
 		kallsymso=.tmp_kallsyms3.o
 		kallsyms_vmlinux=.tmp_vmlinux3
 
-		vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3
-
+		vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_kernel_bin_o}
 		kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o
 	fi
 fi
 
 info LD vmlinux
-vmlinux_link "${kallsymso}" vmlinux
-
-if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
-	gen_btf vmlinux
-fi
+vmlinux_link vmlinux "${kallsymso}" "${btf_kernel_bin_o}"
 
 if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then
 	info SORTEX vmlinux
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] btf: expose BTF info through sysfs
  2019-08-07 18:38 [PATCH bpf-next] btf: expose BTF info through sysfs Andrii Nakryiko
@ 2019-08-08  6:09 ` Greg KH
  2019-08-08 10:55 ` Jiri Olsa
  1 sibling, 0 replies; 4+ messages in thread
From: Greg KH @ 2019-08-08  6:09 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, netdev, ast, daniel, yhs, andrii.nakryiko, kernel-team,
	Masahiro Yamada, Arnaldo Carvalho de Melo, Jiri Olsa

On Wed, Aug 07, 2019 at 11:38:21AM -0700, Andrii Nakryiko wrote:
> Make .BTF section allocated and expose its contents through sysfs.

Ah, found the original of this, sorry for the noise on the previous
response...

Still need Documentation/ABI/ entries though :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] btf: expose BTF info through sysfs
  2019-08-07 18:38 [PATCH bpf-next] btf: expose BTF info through sysfs Andrii Nakryiko
  2019-08-08  6:09 ` Greg KH
@ 2019-08-08 10:55 ` Jiri Olsa
  2019-08-08 10:59   ` Jiri Olsa
  1 sibling, 1 reply; 4+ messages in thread
From: Jiri Olsa @ 2019-08-08 10:55 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, netdev, ast, daniel, yhs, andrii.nakryiko, kernel-team,
	Masahiro Yamada, Arnaldo Carvalho de Melo, Jiri Olsa

On Wed, Aug 07, 2019 at 11:38:21AM -0700, Andrii Nakryiko wrote:
> Make .BTF section allocated and expose its contents through sysfs.
> 
> /sys/kernel/btf directory is created to contain all the BTFs present
> inside kernel. Currently there is only kernel's main BTF, represented as
> /sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported,
> each module will expose its BTF as /sys/kernel/btf/<module-name> file.
> 
> Current approach relies on a few pieces coming together:
> 1. pahole is used to take almost final vmlinux image (modulo .BTF and
>    kallsyms) and generate .BTF section by converting DWARF info into
>    BTF. This section is not allocated and not mapped to any segment,
>    though, so is not yet accessible from inside kernel at runtime.
> 2. objcopy dumps .BTF contents into binary file and subsequently
>    convert binary file into linkable object file with automatically
>    generated symbols _binary__btf_kernel_bin_start and
>    _binary__btf_kernel_bin_end, pointing to start and end, respectively,
>    of BTF raw data.
> 3. final vmlinux image is generated by linking this object file (and
>    kallsyms, if necessary). sysfs_btf.c then creates
>    /sys/kernel/btf/kernel file and exposes embedded BTF contents through
>    it. This allows, e.g., libbpf and bpftool access BTF info at
>    well-known location, without resorting to searching for vmlinux image
>    on disk (location of which is not standardized and vmlinux image
>    might not be even available in some scenarios, e.g., inside qemu
>    during testing).
> 
> Alternative approach using .incbin assembler directive to embed BTF
> contents directly was attempted but didn't work, because sysfs_proc.o is
> not re-compiled during link-vmlinux.sh stage. This is required, though,
> to update embedded BTF data (initially empty data is embedded, then
> pahole generates BTF info and we need to regenerate sysfs_btf.o with
> updated contents, but it's too late at that point).
> 
> If BTF couldn't be generated due to missing or too old pahole,
> sysfs_btf.c handles that gracefully by detecting that
> _binary__btf_kernel_bin_start (weak symbol) is 0 and not creating
> /sys/kernel/btf at all.
> 
> Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
> Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
>  kernel/bpf/Makefile     |  3 +++
>  kernel/bpf/sysfs_btf.c  | 52 ++++++++++++++++++++++++++++++++++++++++
>  scripts/link-vmlinux.sh | 53 ++++++++++++++++++++++++++---------------
>  3 files changed, 89 insertions(+), 19 deletions(-)
>  create mode 100644 kernel/bpf/sysfs_btf.c
> 
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 29d781061cd5..e1d9adb212f9 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
>  ifeq ($(CONFIG_INET),y)
>  obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
>  endif
> +ifeq ($(CONFIG_SYSFS),y)
> +obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
> +endif
> diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
> new file mode 100644
> index 000000000000..ac06ce1d62e8
> --- /dev/null
> +++ b/kernel/bpf/sysfs_btf.c
> @@ -0,0 +1,52 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Provide kernel BTF information for introspection and use by eBPF tools.
> + */
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/kobject.h>
> +#include <linux/init.h>
> +
> +/* See scripts/link-vmlinux.sh, gen_btf() func for details */
> +extern char __weak _binary__btf_kernel_bin_start[];
> +extern char __weak _binary__btf_kernel_bin_end[];
> +
> +static ssize_t
> +btf_kernel_read(struct file *file, struct kobject *kobj,
> +		struct bin_attribute *bin_attr,
> +		char *buf, loff_t off, size_t len)
> +{
> +	memcpy(buf, _binary__btf_kernel_bin_start + off, len);

hum, should you check the end of the btf data?
maybe use the memory_read_from_buffer function instead

jirka

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH bpf-next] btf: expose BTF info through sysfs
  2019-08-08 10:55 ` Jiri Olsa
@ 2019-08-08 10:59   ` Jiri Olsa
  0 siblings, 0 replies; 4+ messages in thread
From: Jiri Olsa @ 2019-08-08 10:59 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, netdev, ast, daniel, yhs, andrii.nakryiko, kernel-team,
	Masahiro Yamada, Arnaldo Carvalho de Melo, Jiri Olsa

On Thu, Aug 08, 2019 at 12:56:01PM +0200, Jiri Olsa wrote:
> On Wed, Aug 07, 2019 at 11:38:21AM -0700, Andrii Nakryiko wrote:
> > Make .BTF section allocated and expose its contents through sysfs.
> > 
> > /sys/kernel/btf directory is created to contain all the BTFs present
> > inside kernel. Currently there is only kernel's main BTF, represented as
> > /sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported,
> > each module will expose its BTF as /sys/kernel/btf/<module-name> file.
> > 
> > Current approach relies on a few pieces coming together:
> > 1. pahole is used to take almost final vmlinux image (modulo .BTF and
> >    kallsyms) and generate .BTF section by converting DWARF info into
> >    BTF. This section is not allocated and not mapped to any segment,
> >    though, so is not yet accessible from inside kernel at runtime.
> > 2. objcopy dumps .BTF contents into binary file and subsequently
> >    convert binary file into linkable object file with automatically
> >    generated symbols _binary__btf_kernel_bin_start and
> >    _binary__btf_kernel_bin_end, pointing to start and end, respectively,
> >    of BTF raw data.
> > 3. final vmlinux image is generated by linking this object file (and
> >    kallsyms, if necessary). sysfs_btf.c then creates
> >    /sys/kernel/btf/kernel file and exposes embedded BTF contents through
> >    it. This allows, e.g., libbpf and bpftool access BTF info at
> >    well-known location, without resorting to searching for vmlinux image
> >    on disk (location of which is not standardized and vmlinux image
> >    might not be even available in some scenarios, e.g., inside qemu
> >    during testing).
> > 
> > Alternative approach using .incbin assembler directive to embed BTF
> > contents directly was attempted but didn't work, because sysfs_proc.o is
> > not re-compiled during link-vmlinux.sh stage. This is required, though,
> > to update embedded BTF data (initially empty data is embedded, then
> > pahole generates BTF info and we need to regenerate sysfs_btf.o with
> > updated contents, but it's too late at that point).
> > 
> > If BTF couldn't be generated due to missing or too old pahole,
> > sysfs_btf.c handles that gracefully by detecting that
> > _binary__btf_kernel_bin_start (weak symbol) is 0 and not creating
> > /sys/kernel/btf at all.
> > 
> > Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
> > Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> >  kernel/bpf/Makefile     |  3 +++
> >  kernel/bpf/sysfs_btf.c  | 52 ++++++++++++++++++++++++++++++++++++++++
> >  scripts/link-vmlinux.sh | 53 ++++++++++++++++++++++++++---------------
> >  3 files changed, 89 insertions(+), 19 deletions(-)
> >  create mode 100644 kernel/bpf/sysfs_btf.c
> > 
> > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> > index 29d781061cd5..e1d9adb212f9 100644
> > --- a/kernel/bpf/Makefile
> > +++ b/kernel/bpf/Makefile
> > @@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
> >  ifeq ($(CONFIG_INET),y)
> >  obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
> >  endif
> > +ifeq ($(CONFIG_SYSFS),y)
> > +obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
> > +endif
> > diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
> > new file mode 100644
> > index 000000000000..ac06ce1d62e8
> > --- /dev/null
> > +++ b/kernel/bpf/sysfs_btf.c
> > @@ -0,0 +1,52 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Provide kernel BTF information for introspection and use by eBPF tools.
> > + */
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/kobject.h>
> > +#include <linux/init.h>
> > +
> > +/* See scripts/link-vmlinux.sh, gen_btf() func for details */
> > +extern char __weak _binary__btf_kernel_bin_start[];
> > +extern char __weak _binary__btf_kernel_bin_end[];
> > +
> > +static ssize_t
> > +btf_kernel_read(struct file *file, struct kobject *kobj,
> > +		struct bin_attribute *bin_attr,
> > +		char *buf, loff_t off, size_t len)
> > +{
> > +	memcpy(buf, _binary__btf_kernel_bin_start + off, len);
> 
> hum, should you check the end of the btf data?
> maybe use the memory_read_from_buffer function instead

nah, looks like that size settings will do that for you
in sysfs_kf_bin_read, nice.. nevermind then ;-)

jirka

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-08-08 10:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-07 18:38 [PATCH bpf-next] btf: expose BTF info through sysfs Andrii Nakryiko
2019-08-08  6:09 ` Greg KH
2019-08-08 10:55 ` Jiri Olsa
2019-08-08 10:59   ` Jiri Olsa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).