Dwarves Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF.
@ 2020-06-08 17:34 Hao Luo
       [not found] ` <20200608173403.151706-1-haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Hao Luo @ 2020-06-08 17:34 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, Alexei Starovoitov,
	daniel-FeC+5ew28dpmcu3hnIyYJQ, olegrom-hpIqsD4AKlfQT0dZR+AlfA,
	kafai-b10kYP2dOMg, dwarves-u79uwXL29TY76Z2rM5mHXA, Hao Luo

On SMP systems, the global percpu variables are placed in a special
'.data..percpu' section, which is stored in a segment whose initial
address is set to 0, the addresses of per-CPU variables are relative
positive addresses [1].

This patch extracts these variables from vmlinux and places them with
their type information in BTF. More specifically, when BTF is encoded,
we find the index of the '.data..percpu' section and then traverse
the symbol table to find those global objects which are in this section.
For each of these objects, we push a BTF_KIND_VAR into the types buffer,
and a BTF_VAR_SECINFO into another buffer, percpu_secinfo. When all the
CUs have finished processing, we push a BTF_KIND_DATASEC into the
btfe->types buffer, followed by the percpu_secinfo's content.

In a v5.7-rc7 linux kernel, I was able to extract 291 such variables.
The build time overhead is small and the space overhead is also small.

Testing:

Before:
 $ readelf -SW vmlinux | grep BTF
 [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d2bf8 00   A  0   0  1

After:
 $ pahole -J vmlinux
 $ readelf -SW vmlinux  | grep BTF
 [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d5bca 00   A  0   0  1

Common percpu vars can be found in the BTF section.

 $ bpftool btf dump file vmlinux | grep runqueues
 [14098] VAR 'runqueues' type_id=13725, linkage=global-alloc

 $ bpftool btf dump file vmlinux | grep 'cpu_stopper'
 [17592] STRUCT 'cpu_stopper' size=72 vlen=5
 [17612] VAR 'cpu_stopper' type_id=17592, linkage=static

 $ bpftool btf dump file vmlinux | grep ' DATASEC '
 [63652] DATASEC '.data..percpu' size=0 vlen=294

References:
 [1] https://lwn.net/Articles/531148/
Signed-off-by: Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
Changelog since v2:
- Move finding percpu_shndx and extracting symtab into btfe creation,
  so we don't have to allocate a new symtab for each CU.
- More debug msg by logging the vars encoded in 'verbose' mode. We
  probably don't want to log the symbols that are _not_ encoded,
  since that would be too verbose.
- Calculate var offsets using 'addr - shdr.sh_addr', so it could be
  generalized to other sections in future.
- Filter out the symbols that are not STT_OBJECT.
- Sort var_secinfos in the DATASEC by their offsets.
- Free 'persec_secinfo' buffer and 'symtab' in btfe deletion.
- Replace the string ".data..percpu" with a constant PERCPU_SECTION.

Changelog since v1:
- Add a ".data..percpu" DATASEC that encodes the found VARs.
- Use percpu section's shndx to find the symbols that are percpu variables.
- Use the correct type to set VAR's linkage.

 btf_encoder.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++
 dwarves.c     |   6 +++
 dwarves.h     |   2 +
 libbtf.c      | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libbtf.h      |  12 +++++
 pahole.c      |   1 +
 6 files changed, 263 insertions(+)

diff --git a/btf_encoder.c b/btf_encoder.c
index df16ba0..2f98f48 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -168,6 +168,27 @@ int btf_encoder__encode()
 	return err;
 }
 
+
+#define HASHADDR__BITS 8
+#define HASHADDR__SIZE (1UL << HASHADDR__BITS)
+#define hashaddr__fn(key) hash_64(key, HASHADDR__BITS)
+
+static struct variable *hashaddr__find_variable(const struct hlist_head hashtable[],
+						const uint64_t addr)
+{
+	struct variable *variable;
+	struct hlist_node *pos;
+	uint16_t bucket = hashaddr__fn(addr);
+	const struct hlist_head *head = &hashtable[bucket];
+
+	hlist_for_each_entry(variable, pos, head, tool_hnode) {
+		if (variable->ip.addr == addr)
+			return variable;
+	}
+
+	return NULL;
+}
+
 int cu__encode_btf(struct cu *cu, int verbose)
 {
 	bool add_index_type = false;
@@ -176,6 +197,10 @@ int cu__encode_btf(struct cu *cu, int verbose)
 	struct function *fn;
 	struct tag *pos;
 	int err = 0;
+	struct hlist_head hash_addr[HASHADDR__SIZE];
+	struct variable *var;
+	bool has_global_var = false;
+	GElf_Sym sym;
 
 	if (btfe && strcmp(btfe->filename, cu->filename)) {
 		err = btf_encoder__encode();
@@ -241,6 +266,100 @@ int cu__encode_btf(struct cu *cu, int verbose)
 		}
 	}
 
+
+	if (btfe->percpu_shndx == 0 || !btfe->symtab)
+		goto out;
+
+	if (verbose)
+		printf("search cu '%s' for percpu global variables.\n", cu->name);
+
+	/* cache variables' addresses, preparing for searching in symtab. */
+	for (core_id = 0; core_id < HASHADDR__SIZE; ++core_id)
+		INIT_HLIST_HEAD(&hash_addr[core_id]);
+
+	cu__for_each_variable(cu, core_id, pos) {
+		struct hlist_head *head;
+
+		var = tag__variable(pos);
+		if (var->declaration)
+			continue;
+		/* percpu variables are allocated in global space */
+		if (variable__scope(var) != VSCOPE_GLOBAL)
+			continue;
+		has_global_var = true;
+		head = &hash_addr[hashaddr__fn(var->ip.addr)];
+		hlist_add_head(&var->tool_hnode, head);
+	}
+	if (!has_global_var) {
+		if (verbose)
+			printf("cu has no global variable defined, skip.\n");
+		goto out;
+	}
+
+	/* search within symtab for percpu variables */
+	elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
+		uint32_t linkage, type, size, offset;
+		int32_t btf_var_id, btf_var_secinfo_id;
+		uint64_t addr;
+
+		/* compare a symbol's shndx to determine if it's a percpu variable */
+		if (elf_sym__section(&sym) != btfe->percpu_shndx)
+			continue;
+		if (elf_sym__type(&sym) != STT_OBJECT)
+			continue;
+
+		addr = elf_sym__value(&sym);
+		/*
+		 * Store only those symbols that have allocated space in the percpu section.
+		 * This excludes the following three types of symbols:
+		 *
+		 *  1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
+		 *  2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
+		 *  3. __exitcall(fn), functions which are labeled as exit calls.
+		 *
+		 * In addition, the variables defined using DEFINE_PERCPU_FIRST are
+		 * also not included, which currently includes:
+		 *
+		 *  1. fixed_percpu_data
+		 */
+		if (!addr)
+			continue;
+		var = hashaddr__find_variable(hash_addr, addr);
+		if (var == NULL)
+			continue;
+
+		if (verbose)
+			printf("symbol '%s' of address 0x%lx encoded\n",
+			       elf_sym__name(&sym, btfe->symtab), addr);
+
+		/* add a BTF_KIND_VAR in btfe->types */
+		linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
+		type = var->ip.tag.type + type_id_off;
+		btf_var_id = btf_elf__add_var_type(btfe, type, var->name, linkage);
+		if (btf_var_id < 0) {
+			err = -1;
+			printf("error: failed to encode variable '%s'\n",
+			       variable__name(var, cu));
+			break;
+		}
+
+		/*
+		 * add a BTF_VAR_SECINFO in btfe->percpu_secinfo, which will be added into
+		 * btfe->types later when we add BTF_VAR_DATASEC.
+		 */
+		size = variable__type_size(var, cu);
+		type = btf_var_id;
+		offset = addr - btfe->percpu_base_addr;
+		btf_var_secinfo_id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo,
+							      type, offset, size);
+		if (btf_var_secinfo_id < 0) {
+			err = -1;
+			printf("error: failed to encode var secinfo '%s'\n",
+			       variable__name(var, cu));
+			break;
+		}
+	}
+
 out:
 	if (err)
 		btf_elf__delete(btfe);
diff --git a/dwarves.c b/dwarves.c
index eb7885f..141f688 100644
--- a/dwarves.c
+++ b/dwarves.c
@@ -978,6 +978,12 @@ const char *variable__type_name(const struct variable *var,
 	return tag != NULL ? tag__name(tag, cu, bf, len, NULL) : NULL;
 }
 
+uint32_t variable__type_size(const struct variable *var, const struct cu *cu)
+{
+	const struct tag *tag = cu__type(cu, var->ip.tag.type);
+	return tag != NULL ? tag__size(tag, cu) : 0;
+}
+
 void class_member__delete(struct class_member *member, struct cu *cu)
 {
 	obstack_free(&cu->obstack, member);
diff --git a/dwarves.h b/dwarves.h
index a772e57..e4b0255 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -672,6 +672,8 @@ const char *variable__name(const struct variable *var, const struct cu *cu);
 const char *variable__type_name(const struct variable *var,
 				const struct cu *cu, char *bf, size_t len);
 
+uint32_t variable__type_size(const struct variable *var, const struct cu *cu);
+
 struct lexblock {
 	struct ip_tag	 ip;
 	struct list_head tags;
diff --git a/libbtf.c b/libbtf.c
index 2fbce40..a7dc5fd 100644
--- a/libbtf.c
+++ b/libbtf.c
@@ -25,6 +25,7 @@
 #include "dutil.h"
 #include "gobuffer.h"
 #include "dwarves.h"
+#include "elf_symtab.h"
 
 #define BTF_INFO_ENCODE(kind, kind_flag, vlen)				\
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
@@ -46,8 +47,21 @@ struct btf_array_type {
 	struct btf_array array;
 };
 
+struct btf_var_type {
+	struct btf_type type;
+	struct btf_var var;
+};
+
 uint8_t btf_elf__verbose;
 
+static int btf_var_secinfo_cmp(const void *a, const void *b)
+{
+	const struct btf_var_secinfo *av = a;
+	const struct btf_var_secinfo *bv = b;
+
+	return av->offset - bv->offset;
+}
+
 uint32_t btf_elf__get32(struct btf_elf *btfe, uint32_t *p)
 {
 	uint32_t val = *p;
@@ -137,6 +151,8 @@ out:
 struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
 {
 	struct btf_elf *btfe = zalloc(sizeof(*btfe));
+	GElf_Shdr shdr;
+	Elf_Scn *sec;
 
 	if (!btfe)
 		return NULL;
@@ -193,6 +209,26 @@ struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
 	default:	 btfe->wordsize = 0; break;
 	}
 
+	btfe->symtab = elf_symtab__new(NULL, btfe->elf, &btfe->ehdr);
+	if (!btfe->symtab) {
+		if (btf_elf__verbose)
+			printf("%s: '%s' doesn't have symtab.\n", __func__,
+			       btfe->filename);
+		return btfe;
+	}
+
+	/* find percpu section's shndx */
+	sec = elf_section_by_name(btfe->elf, &btfe->ehdr, &shdr, PERCPU_SECTION,
+				  NULL);
+	if (!sec) {
+		if (btf_elf__verbose)
+			printf("%s: '%s' doesn't have '%s' section\n", __func__,
+			       btfe->filename, PERCPU_SECTION);
+		return btfe;
+	}
+	btfe->percpu_shndx = elf_ndxscn(sec);
+	btfe->percpu_base_addr = shdr.sh_addr;
+
 	return btfe;
 
 errout:
@@ -211,7 +247,10 @@ void btf_elf__delete(struct btf_elf *btfe)
 			elf_end(btfe->elf);
 	}
 
+	elf_symtab__delete(btfe->symtab);
+
 	__gobuffer__delete(&btfe->types);
+	__gobuffer__delete(&btfe->percpu_secinfo);
 	free(btfe->filename);
 	free(btfe->data);
 	free(btfe);
@@ -613,6 +652,86 @@ int32_t btf_elf__add_func_proto(struct btf_elf *btfe, struct ftype *ftype, uint3
 	return type_id;
 }
 
+int32_t btf_elf__add_var_type(struct btf_elf *btfe, uint32_t type, uint32_t name_off,
+			      uint32_t linkage)
+{
+	struct btf_var_type t;
+
+	t.type.name_off = name_off;
+	t.type.info = BTF_INFO_ENCODE(BTF_KIND_VAR, 0, 0);
+	t.type.type = type;
+
+	t.var.linkage = linkage;
+
+	++btfe->type_index;
+	if (gobuffer__add(&btfe->types, &t.type, sizeof(t)) < 0) {
+		btf_elf__log_type(btfe, &t.type, true, true,
+				  "type=%u name=%s Error in adding gobuffer",
+				  t.type.type, btf_elf__name_in_gobuf(btfe, t.type.name_off));
+		return -1;
+	}
+
+	btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s",
+			  t.type.type, btf_elf__name_in_gobuf(btfe, t.type.name_off));
+
+	return btfe->type_index;
+}
+
+int32_t btf_elf__add_var_secinfo(struct gobuffer *buf, uint32_t type,
+				 uint32_t offset, uint32_t size)
+{
+	struct btf_var_secinfo si = {
+		.type = type,
+		.offset = offset,
+		.size = size,
+	};
+	return gobuffer__add(buf, &si, sizeof(si));
+}
+
+extern struct strings *strings;
+
+int32_t btf_elf__add_datasec_type(struct btf_elf *btfe, const char *section_name,
+				  struct gobuffer *var_secinfo_buf)
+{
+	struct btf_type type;
+	size_t sz = gobuffer__size(var_secinfo_buf);
+	uint16_t nr_var_secinfo = sz / sizeof(struct btf_var_secinfo);
+	uint32_t name_off;
+
+	/*
+	 * dwarves doesn't store section names in its string table,
+	 * so we have to add it by ourselves.
+	 */
+	name_off = strings__add(strings, section_name);
+
+	type.name_off = name_off;
+	type.info = BTF_INFO_ENCODE(BTF_KIND_DATASEC, 0, nr_var_secinfo);
+	type.size = 0;
+
+	++btfe->type_index;
+	if (gobuffer__add(&btfe->types, &type, sizeof(type)) < 0) {
+		btf_elf__log_type(btfe, &type, true, true,
+				  "name=%s vlen=%u Error in adding datasec",
+				  btf_elf__name_in_gobuf(btfe, type.name_off),
+				  nr_var_secinfo);
+		return -1;
+	}
+	qsort(var_secinfo_buf->entries, nr_var_secinfo,
+	      sizeof(struct btf_var_secinfo), btf_var_secinfo_cmp);
+	if (gobuffer__add(&btfe->types, var_secinfo_buf->entries, sz) < 0) {
+		btf_elf__log_type(btfe, &type, true, true,
+				  "name=%s vlen=%u Error in adding var_secinfo",
+				  btf_elf__name_in_gobuf(btfe, type.name_off),
+				  nr_var_secinfo);
+		return -1;
+	}
+
+	btf_elf__log_type(btfe, &type, false, false, "type=datasec name=%s",
+			  btf_elf__name_in_gobuf(btfe, type.name_off));
+
+	return btfe->type_index;
+}
+
 static int btf_elf__write(const char *filename, struct btf *btf)
 {
 	GElf_Shdr shdr_mem, *shdr;
@@ -727,6 +846,10 @@ int btf_elf__encode(struct btf_elf *btfe, uint8_t flags)
 	if (gobuffer__size(&btfe->types) == 0)
 		return 0;
 
+	if (gobuffer__size(&btfe->percpu_secinfo) != 0)
+		btf_elf__add_datasec_type(btfe, PERCPU_SECTION,
+					  &btfe->percpu_secinfo);
+
 	btfe->size = sizeof(*hdr) + (gobuffer__size(&btfe->types) + gobuffer__size(btfe->strings));
 	btfe->data = zalloc(btfe->size);
 
diff --git a/libbtf.h b/libbtf.h
index f3c8500..be06480 100644
--- a/libbtf.h
+++ b/libbtf.h
@@ -20,8 +20,10 @@ struct btf_elf {
 	void		  *priv;
 	Elf		  *elf;
 	GElf_Ehdr	  ehdr;
+	struct elf_symtab *symtab;
 	struct gobuffer	  types;
 	struct gobuffer   *strings;
+	struct gobuffer   percpu_secinfo;
 	char		  *filename;
 	size_t		  size;
 	int		  swapped;
@@ -30,11 +32,15 @@ struct btf_elf {
 	bool		  is_big_endian;
 	bool		  raw_btf; // "/sys/kernel/btf/vmlinux"
 	uint32_t	  type_index;
+	uint32_t	  percpu_shndx;
+	uint64_t	  percpu_base_addr;
 };
 
 extern uint8_t btf_elf__verbose;
 #define btf_elf__verbose_log(fmt, ...) { if (btf_elf__verbose) printf(fmt, __VA_ARGS__); }
 
+#define PERCPU_SECTION ".data..percpu"
+
 struct base_type;
 struct ftype;
 
@@ -55,6 +61,12 @@ int32_t btf_elf__add_enum(struct btf_elf *btf, uint32_t name, uint32_t size,
 int btf_elf__add_enum_val(struct btf_elf *btf, uint32_t name, int32_t value);
 int32_t btf_elf__add_func_proto(struct btf_elf *btf, struct ftype *ftype,
 				uint32_t type_id_off);
+int32_t btf_elf__add_var_type(struct btf_elf *btfe, uint32_t type, uint32_t name_off,
+			      uint32_t linkage);
+int32_t btf_elf__add_var_secinfo(struct gobuffer *buf, uint32_t type,
+				 uint32_t offset, uint32_t size);
+int32_t btf_elf__add_datasec_type(struct btf_elf *btfe, const char *section_name,
+				  struct gobuffer *var_secinfo_buf);
 void btf_elf__set_strings(struct btf_elf *btf, struct gobuffer *strings);
 int  btf_elf__encode(struct btf_elf *btf, uint8_t flags);
 
diff --git a/pahole.c b/pahole.c
index e2a081b..8407db9 100644
--- a/pahole.c
+++ b/pahole.c
@@ -1084,6 +1084,7 @@ static error_t pahole__options_parser(int key, char *arg,
 	case 'i': find_containers = 1;
 		  class_name = arg;			break;
 	case 'J': btf_encode = 1;
+		  conf_load.get_addr_info = true;
 		  no_bitfield_type_recode = true;	break;
 	case 'l': conf.show_first_biggest_size_base_type_member = 1;	break;
 	case 'M': conf.show_only_data_members = 1;	break;
-- 
2.27.0.278.ge193c7cf3a9-goog

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF.
       [not found] ` <20200608173403.151706-1-haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
@ 2020-06-09 14:29   ` Arnaldo Carvalho de Melo
       [not found]     ` <20200609142940.GA24868-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-06-09 14:29 UTC (permalink / raw)
  To: Hao Luo
  Cc: Andrii Nakryiko, Alexei Starovoitov,
	daniel-FeC+5ew28dpmcu3hnIyYJQ, olegrom-hpIqsD4AKlfQT0dZR+AlfA,
	kafai-b10kYP2dOMg, dwarves-u79uwXL29TY76Z2rM5mHXA

Em Mon, Jun 08, 2020 at 10:34:03AM -0700, Hao Luo escreveu:
> On SMP systems, the global percpu variables are placed in a special
> '.data..percpu' section, which is stored in a segment whose initial
> address is set to 0, the addresses of per-CPU variables are relative
> positive addresses [1].
> 
> This patch extracts these variables from vmlinux and places them with
> their type information in BTF. More specifically, when BTF is encoded,
> we find the index of the '.data..percpu' section and then traverse
> the symbol table to find those global objects which are in this section.
> For each of these objects, we push a BTF_KIND_VAR into the types buffer,
> and a BTF_VAR_SECINFO into another buffer, percpu_secinfo. When all the
> CUs have finished processing, we push a BTF_KIND_DATASEC into the
> btfe->types buffer, followed by the percpu_secinfo's content.
> 
> In a v5.7-rc7 linux kernel, I was able to extract 291 such variables.
> The build time overhead is small and the space overhead is also small.

Looks good, I'm doing some testing on it now, Andrii, can you provide an
Acked-by or Reviewed-by?

Thanks,

- Arnaldo
 
> Testing:
> 
> Before:
>  $ readelf -SW vmlinux | grep BTF
>  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d2bf8 00   A  0   0  1
> 
> After:
>  $ pahole -J vmlinux
>  $ readelf -SW vmlinux  | grep BTF
>  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d5bca 00   A  0   0  1
> 
> Common percpu vars can be found in the BTF section.
> 
>  $ bpftool btf dump file vmlinux | grep runqueues
>  [14098] VAR 'runqueues' type_id=13725, linkage=global-alloc
> 
>  $ bpftool btf dump file vmlinux | grep 'cpu_stopper'
>  [17592] STRUCT 'cpu_stopper' size=72 vlen=5
>  [17612] VAR 'cpu_stopper' type_id=17592, linkage=static
> 
>  $ bpftool btf dump file vmlinux | grep ' DATASEC '
>  [63652] DATASEC '.data..percpu' size=0 vlen=294
> 
> References:
>  [1] https://lwn.net/Articles/531148/
> Signed-off-by: Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> ---
> Changelog since v2:
> - Move finding percpu_shndx and extracting symtab into btfe creation,
>   so we don't have to allocate a new symtab for each CU.
> - More debug msg by logging the vars encoded in 'verbose' mode. We
>   probably don't want to log the symbols that are _not_ encoded,
>   since that would be too verbose.
> - Calculate var offsets using 'addr - shdr.sh_addr', so it could be
>   generalized to other sections in future.
> - Filter out the symbols that are not STT_OBJECT.
> - Sort var_secinfos in the DATASEC by their offsets.
> - Free 'persec_secinfo' buffer and 'symtab' in btfe deletion.
> - Replace the string ".data..percpu" with a constant PERCPU_SECTION.
> 
> Changelog since v1:
> - Add a ".data..percpu" DATASEC that encodes the found VARs.
> - Use percpu section's shndx to find the symbols that are percpu variables.
> - Use the correct type to set VAR's linkage.
> 
>  btf_encoder.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++
>  dwarves.c     |   6 +++
>  dwarves.h     |   2 +
>  libbtf.c      | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  libbtf.h      |  12 +++++
>  pahole.c      |   1 +
>  6 files changed, 263 insertions(+)
> 
> diff --git a/btf_encoder.c b/btf_encoder.c
> index df16ba0..2f98f48 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -168,6 +168,27 @@ int btf_encoder__encode()
>  	return err;
>  }
>  
> +
> +#define HASHADDR__BITS 8
> +#define HASHADDR__SIZE (1UL << HASHADDR__BITS)
> +#define hashaddr__fn(key) hash_64(key, HASHADDR__BITS)
> +
> +static struct variable *hashaddr__find_variable(const struct hlist_head hashtable[],
> +						const uint64_t addr)
> +{
> +	struct variable *variable;
> +	struct hlist_node *pos;
> +	uint16_t bucket = hashaddr__fn(addr);
> +	const struct hlist_head *head = &hashtable[bucket];
> +
> +	hlist_for_each_entry(variable, pos, head, tool_hnode) {
> +		if (variable->ip.addr == addr)
> +			return variable;
> +	}
> +
> +	return NULL;
> +}
> +
>  int cu__encode_btf(struct cu *cu, int verbose)
>  {
>  	bool add_index_type = false;
> @@ -176,6 +197,10 @@ int cu__encode_btf(struct cu *cu, int verbose)
>  	struct function *fn;
>  	struct tag *pos;
>  	int err = 0;
> +	struct hlist_head hash_addr[HASHADDR__SIZE];
> +	struct variable *var;
> +	bool has_global_var = false;
> +	GElf_Sym sym;
>  
>  	if (btfe && strcmp(btfe->filename, cu->filename)) {
>  		err = btf_encoder__encode();
> @@ -241,6 +266,100 @@ int cu__encode_btf(struct cu *cu, int verbose)
>  		}
>  	}
>  
> +
> +	if (btfe->percpu_shndx == 0 || !btfe->symtab)
> +		goto out;
> +
> +	if (verbose)
> +		printf("search cu '%s' for percpu global variables.\n", cu->name);
> +
> +	/* cache variables' addresses, preparing for searching in symtab. */
> +	for (core_id = 0; core_id < HASHADDR__SIZE; ++core_id)
> +		INIT_HLIST_HEAD(&hash_addr[core_id]);
> +
> +	cu__for_each_variable(cu, core_id, pos) {
> +		struct hlist_head *head;
> +
> +		var = tag__variable(pos);
> +		if (var->declaration)
> +			continue;
> +		/* percpu variables are allocated in global space */
> +		if (variable__scope(var) != VSCOPE_GLOBAL)
> +			continue;
> +		has_global_var = true;
> +		head = &hash_addr[hashaddr__fn(var->ip.addr)];
> +		hlist_add_head(&var->tool_hnode, head);
> +	}
> +	if (!has_global_var) {
> +		if (verbose)
> +			printf("cu has no global variable defined, skip.\n");
> +		goto out;
> +	}
> +
> +	/* search within symtab for percpu variables */
> +	elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
> +		uint32_t linkage, type, size, offset;
> +		int32_t btf_var_id, btf_var_secinfo_id;
> +		uint64_t addr;
> +
> +		/* compare a symbol's shndx to determine if it's a percpu variable */
> +		if (elf_sym__section(&sym) != btfe->percpu_shndx)
> +			continue;
> +		if (elf_sym__type(&sym) != STT_OBJECT)
> +			continue;
> +
> +		addr = elf_sym__value(&sym);
> +		/*
> +		 * Store only those symbols that have allocated space in the percpu section.
> +		 * This excludes the following three types of symbols:
> +		 *
> +		 *  1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
> +		 *  2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
> +		 *  3. __exitcall(fn), functions which are labeled as exit calls.
> +		 *
> +		 * In addition, the variables defined using DEFINE_PERCPU_FIRST are
> +		 * also not included, which currently includes:
> +		 *
> +		 *  1. fixed_percpu_data
> +		 */
> +		if (!addr)
> +			continue;
> +		var = hashaddr__find_variable(hash_addr, addr);
> +		if (var == NULL)
> +			continue;
> +
> +		if (verbose)
> +			printf("symbol '%s' of address 0x%lx encoded\n",
> +			       elf_sym__name(&sym, btfe->symtab), addr);
> +
> +		/* add a BTF_KIND_VAR in btfe->types */
> +		linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
> +		type = var->ip.tag.type + type_id_off;
> +		btf_var_id = btf_elf__add_var_type(btfe, type, var->name, linkage);
> +		if (btf_var_id < 0) {
> +			err = -1;
> +			printf("error: failed to encode variable '%s'\n",
> +			       variable__name(var, cu));
> +			break;
> +		}
> +
> +		/*
> +		 * add a BTF_VAR_SECINFO in btfe->percpu_secinfo, which will be added into
> +		 * btfe->types later when we add BTF_VAR_DATASEC.
> +		 */
> +		size = variable__type_size(var, cu);
> +		type = btf_var_id;
> +		offset = addr - btfe->percpu_base_addr;
> +		btf_var_secinfo_id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo,
> +							      type, offset, size);
> +		if (btf_var_secinfo_id < 0) {
> +			err = -1;
> +			printf("error: failed to encode var secinfo '%s'\n",
> +			       variable__name(var, cu));
> +			break;
> +		}
> +	}
> +
>  out:
>  	if (err)
>  		btf_elf__delete(btfe);
> diff --git a/dwarves.c b/dwarves.c
> index eb7885f..141f688 100644
> --- a/dwarves.c
> +++ b/dwarves.c
> @@ -978,6 +978,12 @@ const char *variable__type_name(const struct variable *var,
>  	return tag != NULL ? tag__name(tag, cu, bf, len, NULL) : NULL;
>  }
>  
> +uint32_t variable__type_size(const struct variable *var, const struct cu *cu)
> +{
> +	const struct tag *tag = cu__type(cu, var->ip.tag.type);
> +	return tag != NULL ? tag__size(tag, cu) : 0;
> +}
> +
>  void class_member__delete(struct class_member *member, struct cu *cu)
>  {
>  	obstack_free(&cu->obstack, member);
> diff --git a/dwarves.h b/dwarves.h
> index a772e57..e4b0255 100644
> --- a/dwarves.h
> +++ b/dwarves.h
> @@ -672,6 +672,8 @@ const char *variable__name(const struct variable *var, const struct cu *cu);
>  const char *variable__type_name(const struct variable *var,
>  				const struct cu *cu, char *bf, size_t len);
>  
> +uint32_t variable__type_size(const struct variable *var, const struct cu *cu);
> +
>  struct lexblock {
>  	struct ip_tag	 ip;
>  	struct list_head tags;
> diff --git a/libbtf.c b/libbtf.c
> index 2fbce40..a7dc5fd 100644
> --- a/libbtf.c
> +++ b/libbtf.c
> @@ -25,6 +25,7 @@
>  #include "dutil.h"
>  #include "gobuffer.h"
>  #include "dwarves.h"
> +#include "elf_symtab.h"
>  
>  #define BTF_INFO_ENCODE(kind, kind_flag, vlen)				\
>  	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
> @@ -46,8 +47,21 @@ struct btf_array_type {
>  	struct btf_array array;
>  };
>  
> +struct btf_var_type {
> +	struct btf_type type;
> +	struct btf_var var;
> +};
> +
>  uint8_t btf_elf__verbose;
>  
> +static int btf_var_secinfo_cmp(const void *a, const void *b)
> +{
> +	const struct btf_var_secinfo *av = a;
> +	const struct btf_var_secinfo *bv = b;
> +
> +	return av->offset - bv->offset;
> +}
> +
>  uint32_t btf_elf__get32(struct btf_elf *btfe, uint32_t *p)
>  {
>  	uint32_t val = *p;
> @@ -137,6 +151,8 @@ out:
>  struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
>  {
>  	struct btf_elf *btfe = zalloc(sizeof(*btfe));
> +	GElf_Shdr shdr;
> +	Elf_Scn *sec;
>  
>  	if (!btfe)
>  		return NULL;
> @@ -193,6 +209,26 @@ struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
>  	default:	 btfe->wordsize = 0; break;
>  	}
>  
> +	btfe->symtab = elf_symtab__new(NULL, btfe->elf, &btfe->ehdr);
> +	if (!btfe->symtab) {
> +		if (btf_elf__verbose)
> +			printf("%s: '%s' doesn't have symtab.\n", __func__,
> +			       btfe->filename);
> +		return btfe;
> +	}
> +
> +	/* find percpu section's shndx */
> +	sec = elf_section_by_name(btfe->elf, &btfe->ehdr, &shdr, PERCPU_SECTION,
> +				  NULL);
> +	if (!sec) {
> +		if (btf_elf__verbose)
> +			printf("%s: '%s' doesn't have '%s' section\n", __func__,
> +			       btfe->filename, PERCPU_SECTION);
> +		return btfe;
> +	}
> +	btfe->percpu_shndx = elf_ndxscn(sec);
> +	btfe->percpu_base_addr = shdr.sh_addr;
> +
>  	return btfe;
>  
>  errout:
> @@ -211,7 +247,10 @@ void btf_elf__delete(struct btf_elf *btfe)
>  			elf_end(btfe->elf);
>  	}
>  
> +	elf_symtab__delete(btfe->symtab);
> +
>  	__gobuffer__delete(&btfe->types);
> +	__gobuffer__delete(&btfe->percpu_secinfo);
>  	free(btfe->filename);
>  	free(btfe->data);
>  	free(btfe);
> @@ -613,6 +652,86 @@ int32_t btf_elf__add_func_proto(struct btf_elf *btfe, struct ftype *ftype, uint3
>  	return type_id;
>  }
>  
> +int32_t btf_elf__add_var_type(struct btf_elf *btfe, uint32_t type, uint32_t name_off,
> +			      uint32_t linkage)
> +{
> +	struct btf_var_type t;
> +
> +	t.type.name_off = name_off;
> +	t.type.info = BTF_INFO_ENCODE(BTF_KIND_VAR, 0, 0);
> +	t.type.type = type;
> +
> +	t.var.linkage = linkage;
> +
> +	++btfe->type_index;
> +	if (gobuffer__add(&btfe->types, &t.type, sizeof(t)) < 0) {
> +		btf_elf__log_type(btfe, &t.type, true, true,
> +				  "type=%u name=%s Error in adding gobuffer",
> +				  t.type.type, btf_elf__name_in_gobuf(btfe, t.type.name_off));
> +		return -1;
> +	}
> +
> +	btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s",
> +			  t.type.type, btf_elf__name_in_gobuf(btfe, t.type.name_off));
> +
> +	return btfe->type_index;
> +}
> +
> +int32_t btf_elf__add_var_secinfo(struct gobuffer *buf, uint32_t type,
> +				 uint32_t offset, uint32_t size)
> +{
> +	struct btf_var_secinfo si = {
> +		.type = type,
> +		.offset = offset,
> +		.size = size,
> +	};
> +	return gobuffer__add(buf, &si, sizeof(si));
> +}
> +
> +extern struct strings *strings;
> +
> +int32_t btf_elf__add_datasec_type(struct btf_elf *btfe, const char *section_name,
> +				  struct gobuffer *var_secinfo_buf)
> +{
> +	struct btf_type type;
> +	size_t sz = gobuffer__size(var_secinfo_buf);
> +	uint16_t nr_var_secinfo = sz / sizeof(struct btf_var_secinfo);
> +	uint32_t name_off;
> +
> +	/*
> +	 * dwarves doesn't store section names in its string table,
> +	 * so we have to add it by ourselves.
> +	 */
> +	name_off = strings__add(strings, section_name);
> +
> +	type.name_off = name_off;
> +	type.info = BTF_INFO_ENCODE(BTF_KIND_DATASEC, 0, nr_var_secinfo);
> +	type.size = 0;
> +
> +	++btfe->type_index;
> +	if (gobuffer__add(&btfe->types, &type, sizeof(type)) < 0) {
> +		btf_elf__log_type(btfe, &type, true, true,
> +				  "name=%s vlen=%u Error in adding datasec",
> +				  btf_elf__name_in_gobuf(btfe, type.name_off),
> +				  nr_var_secinfo);
> +		return -1;
> +	}
> +	qsort(var_secinfo_buf->entries, nr_var_secinfo,
> +	      sizeof(struct btf_var_secinfo), btf_var_secinfo_cmp);
> +	if (gobuffer__add(&btfe->types, var_secinfo_buf->entries, sz) < 0) {
> +		btf_elf__log_type(btfe, &type, true, true,
> +				  "name=%s vlen=%u Error in adding var_secinfo",
> +				  btf_elf__name_in_gobuf(btfe, type.name_off),
> +				  nr_var_secinfo);
> +		return -1;
> +	}
> +
> +	btf_elf__log_type(btfe, &type, false, false, "type=datasec name=%s",
> +			  btf_elf__name_in_gobuf(btfe, type.name_off));
> +
> +	return btfe->type_index;
> +}
> +
>  static int btf_elf__write(const char *filename, struct btf *btf)
>  {
>  	GElf_Shdr shdr_mem, *shdr;
> @@ -727,6 +846,10 @@ int btf_elf__encode(struct btf_elf *btfe, uint8_t flags)
>  	if (gobuffer__size(&btfe->types) == 0)
>  		return 0;
>  
> +	if (gobuffer__size(&btfe->percpu_secinfo) != 0)
> +		btf_elf__add_datasec_type(btfe, PERCPU_SECTION,
> +					  &btfe->percpu_secinfo);
> +
>  	btfe->size = sizeof(*hdr) + (gobuffer__size(&btfe->types) + gobuffer__size(btfe->strings));
>  	btfe->data = zalloc(btfe->size);
>  
> diff --git a/libbtf.h b/libbtf.h
> index f3c8500..be06480 100644
> --- a/libbtf.h
> +++ b/libbtf.h
> @@ -20,8 +20,10 @@ struct btf_elf {
>  	void		  *priv;
>  	Elf		  *elf;
>  	GElf_Ehdr	  ehdr;
> +	struct elf_symtab *symtab;
>  	struct gobuffer	  types;
>  	struct gobuffer   *strings;
> +	struct gobuffer   percpu_secinfo;
>  	char		  *filename;
>  	size_t		  size;
>  	int		  swapped;
> @@ -30,11 +32,15 @@ struct btf_elf {
>  	bool		  is_big_endian;
>  	bool		  raw_btf; // "/sys/kernel/btf/vmlinux"
>  	uint32_t	  type_index;
> +	uint32_t	  percpu_shndx;
> +	uint64_t	  percpu_base_addr;
>  };
>  
>  extern uint8_t btf_elf__verbose;
>  #define btf_elf__verbose_log(fmt, ...) { if (btf_elf__verbose) printf(fmt, __VA_ARGS__); }
>  
> +#define PERCPU_SECTION ".data..percpu"
> +
>  struct base_type;
>  struct ftype;
>  
> @@ -55,6 +61,12 @@ int32_t btf_elf__add_enum(struct btf_elf *btf, uint32_t name, uint32_t size,
>  int btf_elf__add_enum_val(struct btf_elf *btf, uint32_t name, int32_t value);
>  int32_t btf_elf__add_func_proto(struct btf_elf *btf, struct ftype *ftype,
>  				uint32_t type_id_off);
> +int32_t btf_elf__add_var_type(struct btf_elf *btfe, uint32_t type, uint32_t name_off,
> +			      uint32_t linkage);
> +int32_t btf_elf__add_var_secinfo(struct gobuffer *buf, uint32_t type,
> +				 uint32_t offset, uint32_t size);
> +int32_t btf_elf__add_datasec_type(struct btf_elf *btfe, const char *section_name,
> +				  struct gobuffer *var_secinfo_buf);
>  void btf_elf__set_strings(struct btf_elf *btf, struct gobuffer *strings);
>  int  btf_elf__encode(struct btf_elf *btf, uint8_t flags);
>  
> diff --git a/pahole.c b/pahole.c
> index e2a081b..8407db9 100644
> --- a/pahole.c
> +++ b/pahole.c
> @@ -1084,6 +1084,7 @@ static error_t pahole__options_parser(int key, char *arg,
>  	case 'i': find_containers = 1;
>  		  class_name = arg;			break;
>  	case 'J': btf_encode = 1;
> +		  conf_load.get_addr_info = true;
>  		  no_bitfield_type_recode = true;	break;
>  	case 'l': conf.show_first_biggest_size_base_type_member = 1;	break;
>  	case 'M': conf.show_only_data_members = 1;	break;
> -- 
> 2.27.0.278.ge193c7cf3a9-goog
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF.
       [not found]     ` <20200609142940.GA24868-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2020-06-09 14:58       ` Arnaldo Carvalho de Melo
       [not found]         ` <CA+khW7j_bBNNepxyk4pZQLMS3CxA4CKQ-9cSSub-hTDQW5xZVQ@mail.gmail.com>
  0 siblings, 1 reply; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-06-09 14:58 UTC (permalink / raw)
  To: Hao Luo
  Cc: Andrii Nakryiko, Alexei Starovoitov,
	daniel-FeC+5ew28dpmcu3hnIyYJQ, olegrom-hpIqsD4AKlfQT0dZR+AlfA,
	kafai-b10kYP2dOMg, dwarves-u79uwXL29TY76Z2rM5mHXA

Em Tue, Jun 09, 2020 at 11:29:40AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Jun 08, 2020 at 10:34:03AM -0700, Hao Luo escreveu:
> > On SMP systems, the global percpu variables are placed in a special
> > '.data..percpu' section, which is stored in a segment whose initial
> > address is set to 0, the addresses of per-CPU variables are relative
> > positive addresses [1].
> > 
> > This patch extracts these variables from vmlinux and places them with
> > their type information in BTF. More specifically, when BTF is encoded,
> > we find the index of the '.data..percpu' section and then traverse
> > the symbol table to find those global objects which are in this section.
> > For each of these objects, we push a BTF_KIND_VAR into the types buffer,
> > and a BTF_VAR_SECINFO into another buffer, percpu_secinfo. When all the
> > CUs have finished processing, we push a BTF_KIND_DATASEC into the
> > btfe->types buffer, followed by the percpu_secinfo's content.
> > 
> > In a v5.7-rc7 linux kernel, I was able to extract 291 such variables.
> > The build time overhead is small and the space overhead is also small.
> 
> Looks good, I'm doing some testing on it now, Andrii, can you provide an
> Acked-by or Reviewed-by?

So, I see these (anon) variables, what are these? for an 5.7 vmlinux:

[acme@five pahole]$ bpftool btf dump file vmlinux | grep -w VAR | tail
[67381] VAR '(anon)' type_id=67175, linkage=static
[67495] VAR 'rt_cache_stat' type_id=67417, linkage=static
[67496] VAR 'rt_uncached_list' type_id=67416, linkage=static
[67857] VAR 'tcp_md5sig_pool' type_id=67788, linkage=static
[67993] VAR 'tsq_tasklet' type_id=67927, linkage=static
[69524] VAR 'xfrm_trans_tasklet' type_id=69502, linkage=static
[70055] VAR 'rt6_uncached_list' type_id=67416, linkage=static
[70609] VAR '(anon)' type_id=1713, linkage=static
[70634] VAR 'hmac_ring' type_id=1909, linkage=static
[71591] VAR 'xskmap_flush_list' type_id=85, linkage=static
[acme@five pahole]$

[acme@five pahole]$ bpftool btf dump file vmlinux | grep -w 1713
[1713] FUNC 'memset' type_id=1712
[7235] VAR '(anon)' type_id=1713, linkage=static
[7236] VAR '(anon)' type_id=1713, linkage=static
[8832] VAR '(anon)' type_id=1713, linkage=static
[14346] VAR '(anon)' type_id=1713, linkage=static
[14347] VAR '(anon)' type_id=1713, linkage=static
[14348] VAR '(anon)' type_id=1713, linkage=static
[14349] VAR '(anon)' type_id=1713, linkage=static
[14350] VAR '(anon)' type_id=1713, linkage=static
[14351] VAR '(anon)' type_id=1713, linkage=static
[14352] VAR '(anon)' type_id=1713, linkage=static
[18903] VAR '(anon)' type_id=1713, linkage=static
[18904] VAR '(anon)' type_id=1713, linkage=static
[23180] VAR '(anon)' type_id=1713, linkage=static
[44605] VAR '(anon)' type_id=1713, linkage=static
[60638] VAR '(anon)' type_id=1713, linkage=static
[60639] VAR '(anon)' type_id=1713, linkage=static
[63869] VAR '(anon)' type_id=1713, linkage=static
[70609] VAR '(anon)' type_id=1713, linkage=static
[acme@five pahole]$

[acme@five pahole]$ bpftool btf dump file vmlinux | grep -m1 -w 1713 -B9
[1710] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
	'p' type_id=96
	'q' type_id=97
	'size' type_id=49
[1711] FUNC 'memcpy' type_id=1710
[1712] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
	'p' type_id=96
	'c' type_id=22
	'size' type_id=49
[1713] FUNC 'memset' type_id=1712
[acme@five pahole]$

[acme@five pahole]$ bpftool btf dump file vmlinux | grep -m10 -w 7235 -B5 -A5
	'prec' type_id=22
[7232] FUNC 'arch_show_interrupts' type_id=7231
[7233] FUNC_PROTO '(anon)' ret_type_id=0 vlen=1
	'irq' type_id=10
[7234] FUNC 'ack_bad_irq' type_id=7233
[7235] VAR '(anon)' type_id=1713, linkage=static
[7236] VAR '(anon)' type_id=1713, linkage=static
[7237] ARRAY '(anon)' type_id=233 index_type_id=1 nr_elems=4
[7238] FUNC 'irq_init_percpu_irqstack' type_id=6732
[7239] VAR 'irq_stack_backing_store' type_id=412, linkage=global-alloc
[7240] STRUCT 'estack_pages' size=8 vlen=3
--
	type_id=6739 offset=98096 size=16
	type_id=6737 offset=98112 size=16
	type_id=6736 offset=98128 size=16
	type_id=6764 offset=98144 size=16
	type_id=6763 offset=98160 size=16
	type_id=7235 offset=98176 size=0
	type_id=7330 offset=98192 size=4
	type_id=7329 offset=98200 size=8
	type_id=7328 offset=98208 size=4
	type_id=7325 offset=98216 size=8
	type_id=7327 offset=98224 size=1
[acme@five pahole]$

[acme@five pahole]$ readelf  -wi vmlinux  | grep -m 2 DW_AT_producer
    <1c>   DW_AT_producer    : (indirect string, offset: 0x49): GNU AS 2.32
    <2e>   DW_AT_producer    : (indirect string, offset: 0x1358): GNU C89 9.3.1 20200408 (Red Hat 9.3.1-2) -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount -mfentry -march=x86-64 -g -O2 -std=gnu90 -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE -falign-jumps=1 -falign-loops=1 -fno-asynchronous-unwind-tables -fno-jump-tables -fno-delete-null-pointer-checks -fstack-protector-strong -fno-var-tracking-assignments -fno-strict-overflow -fno-merge-all-constants -fmerge-constants -fstack-check=no -fconserve-stack -fcf-protection=none --param allow-store-data-races=0
[acme@five pahole]$
 
> Thanks,
> 
> - Arnaldo
>  
> > Testing:
> > 
> > Before:
> >  $ readelf -SW vmlinux | grep BTF
> >  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d2bf8 00   A  0   0  1
> > 
> > After:
> >  $ pahole -J vmlinux
> >  $ readelf -SW vmlinux  | grep BTF
> >  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d5bca 00   A  0   0  1
> > 
> > Common percpu vars can be found in the BTF section.
> > 
> >  $ bpftool btf dump file vmlinux | grep runqueues
> >  [14098] VAR 'runqueues' type_id=13725, linkage=global-alloc
> > 
> >  $ bpftool btf dump file vmlinux | grep 'cpu_stopper'
> >  [17592] STRUCT 'cpu_stopper' size=72 vlen=5
> >  [17612] VAR 'cpu_stopper' type_id=17592, linkage=static
> > 
> >  $ bpftool btf dump file vmlinux | grep ' DATASEC '
> >  [63652] DATASEC '.data..percpu' size=0 vlen=294
> > 
> > References:
> >  [1] https://lwn.net/Articles/531148/
> > Signed-off-by: Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> > ---
> > Changelog since v2:
> > - Move finding percpu_shndx and extracting symtab into btfe creation,
> >   so we don't have to allocate a new symtab for each CU.
> > - More debug msg by logging the vars encoded in 'verbose' mode. We
> >   probably don't want to log the symbols that are _not_ encoded,
> >   since that would be too verbose.
> > - Calculate var offsets using 'addr - shdr.sh_addr', so it could be
> >   generalized to other sections in future.
> > - Filter out the symbols that are not STT_OBJECT.
> > - Sort var_secinfos in the DATASEC by their offsets.
> > - Free 'persec_secinfo' buffer and 'symtab' in btfe deletion.
> > - Replace the string ".data..percpu" with a constant PERCPU_SECTION.
> > 
> > Changelog since v1:
> > - Add a ".data..percpu" DATASEC that encodes the found VARs.
> > - Use percpu section's shndx to find the symbols that are percpu variables.
> > - Use the correct type to set VAR's linkage.
> > 
> >  btf_encoder.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  dwarves.c     |   6 +++
> >  dwarves.h     |   2 +
> >  libbtf.c      | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  libbtf.h      |  12 +++++
> >  pahole.c      |   1 +
> >  6 files changed, 263 insertions(+)
> > 
> > diff --git a/btf_encoder.c b/btf_encoder.c
> > index df16ba0..2f98f48 100644
> > --- a/btf_encoder.c
> > +++ b/btf_encoder.c
> > @@ -168,6 +168,27 @@ int btf_encoder__encode()
> >  	return err;
> >  }
> >  
> > +
> > +#define HASHADDR__BITS 8
> > +#define HASHADDR__SIZE (1UL << HASHADDR__BITS)
> > +#define hashaddr__fn(key) hash_64(key, HASHADDR__BITS)
> > +
> > +static struct variable *hashaddr__find_variable(const struct hlist_head hashtable[],
> > +						const uint64_t addr)
> > +{
> > +	struct variable *variable;
> > +	struct hlist_node *pos;
> > +	uint16_t bucket = hashaddr__fn(addr);
> > +	const struct hlist_head *head = &hashtable[bucket];
> > +
> > +	hlist_for_each_entry(variable, pos, head, tool_hnode) {
> > +		if (variable->ip.addr == addr)
> > +			return variable;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> >  int cu__encode_btf(struct cu *cu, int verbose)
> >  {
> >  	bool add_index_type = false;
> > @@ -176,6 +197,10 @@ int cu__encode_btf(struct cu *cu, int verbose)
> >  	struct function *fn;
> >  	struct tag *pos;
> >  	int err = 0;
> > +	struct hlist_head hash_addr[HASHADDR__SIZE];
> > +	struct variable *var;
> > +	bool has_global_var = false;
> > +	GElf_Sym sym;
> >  
> >  	if (btfe && strcmp(btfe->filename, cu->filename)) {
> >  		err = btf_encoder__encode();
> > @@ -241,6 +266,100 @@ int cu__encode_btf(struct cu *cu, int verbose)
> >  		}
> >  	}
> >  
> > +
> > +	if (btfe->percpu_shndx == 0 || !btfe->symtab)
> > +		goto out;
> > +
> > +	if (verbose)
> > +		printf("search cu '%s' for percpu global variables.\n", cu->name);
> > +
> > +	/* cache variables' addresses, preparing for searching in symtab. */
> > +	for (core_id = 0; core_id < HASHADDR__SIZE; ++core_id)
> > +		INIT_HLIST_HEAD(&hash_addr[core_id]);
> > +
> > +	cu__for_each_variable(cu, core_id, pos) {
> > +		struct hlist_head *head;
> > +
> > +		var = tag__variable(pos);
> > +		if (var->declaration)
> > +			continue;
> > +		/* percpu variables are allocated in global space */
> > +		if (variable__scope(var) != VSCOPE_GLOBAL)
> > +			continue;
> > +		has_global_var = true;
> > +		head = &hash_addr[hashaddr__fn(var->ip.addr)];
> > +		hlist_add_head(&var->tool_hnode, head);
> > +	}
> > +	if (!has_global_var) {
> > +		if (verbose)
> > +			printf("cu has no global variable defined, skip.\n");
> > +		goto out;
> > +	}
> > +
> > +	/* search within symtab for percpu variables */
> > +	elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
> > +		uint32_t linkage, type, size, offset;
> > +		int32_t btf_var_id, btf_var_secinfo_id;
> > +		uint64_t addr;
> > +
> > +		/* compare a symbol's shndx to determine if it's a percpu variable */
> > +		if (elf_sym__section(&sym) != btfe->percpu_shndx)
> > +			continue;
> > +		if (elf_sym__type(&sym) != STT_OBJECT)
> > +			continue;
> > +
> > +		addr = elf_sym__value(&sym);
> > +		/*
> > +		 * Store only those symbols that have allocated space in the percpu section.
> > +		 * This excludes the following three types of symbols:
> > +		 *
> > +		 *  1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
> > +		 *  2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
> > +		 *  3. __exitcall(fn), functions which are labeled as exit calls.
> > +		 *
> > +		 * In addition, the variables defined using DEFINE_PERCPU_FIRST are
> > +		 * also not included, which currently includes:
> > +		 *
> > +		 *  1. fixed_percpu_data
> > +		 */
> > +		if (!addr)
> > +			continue;
> > +		var = hashaddr__find_variable(hash_addr, addr);
> > +		if (var == NULL)
> > +			continue;
> > +
> > +		if (verbose)
> > +			printf("symbol '%s' of address 0x%lx encoded\n",
> > +			       elf_sym__name(&sym, btfe->symtab), addr);
> > +
> > +		/* add a BTF_KIND_VAR in btfe->types */
> > +		linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
> > +		type = var->ip.tag.type + type_id_off;
> > +		btf_var_id = btf_elf__add_var_type(btfe, type, var->name, linkage);
> > +		if (btf_var_id < 0) {
> > +			err = -1;
> > +			printf("error: failed to encode variable '%s'\n",
> > +			       variable__name(var, cu));
> > +			break;
> > +		}
> > +
> > +		/*
> > +		 * add a BTF_VAR_SECINFO in btfe->percpu_secinfo, which will be added into
> > +		 * btfe->types later when we add BTF_VAR_DATASEC.
> > +		 */
> > +		size = variable__type_size(var, cu);
> > +		type = btf_var_id;
> > +		offset = addr - btfe->percpu_base_addr;
> > +		btf_var_secinfo_id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo,
> > +							      type, offset, size);
> > +		if (btf_var_secinfo_id < 0) {
> > +			err = -1;
> > +			printf("error: failed to encode var secinfo '%s'\n",
> > +			       variable__name(var, cu));
> > +			break;
> > +		}
> > +	}
> > +
> >  out:
> >  	if (err)
> >  		btf_elf__delete(btfe);
> > diff --git a/dwarves.c b/dwarves.c
> > index eb7885f..141f688 100644
> > --- a/dwarves.c
> > +++ b/dwarves.c
> > @@ -978,6 +978,12 @@ const char *variable__type_name(const struct variable *var,
> >  	return tag != NULL ? tag__name(tag, cu, bf, len, NULL) : NULL;
> >  }
> >  
> > +uint32_t variable__type_size(const struct variable *var, const struct cu *cu)
> > +{
> > +	const struct tag *tag = cu__type(cu, var->ip.tag.type);
> > +	return tag != NULL ? tag__size(tag, cu) : 0;
> > +}
> > +
> >  void class_member__delete(struct class_member *member, struct cu *cu)
> >  {
> >  	obstack_free(&cu->obstack, member);
> > diff --git a/dwarves.h b/dwarves.h
> > index a772e57..e4b0255 100644
> > --- a/dwarves.h
> > +++ b/dwarves.h
> > @@ -672,6 +672,8 @@ const char *variable__name(const struct variable *var, const struct cu *cu);
> >  const char *variable__type_name(const struct variable *var,
> >  				const struct cu *cu, char *bf, size_t len);
> >  
> > +uint32_t variable__type_size(const struct variable *var, const struct cu *cu);
> > +
> >  struct lexblock {
> >  	struct ip_tag	 ip;
> >  	struct list_head tags;
> > diff --git a/libbtf.c b/libbtf.c
> > index 2fbce40..a7dc5fd 100644
> > --- a/libbtf.c
> > +++ b/libbtf.c
> > @@ -25,6 +25,7 @@
> >  #include "dutil.h"
> >  #include "gobuffer.h"
> >  #include "dwarves.h"
> > +#include "elf_symtab.h"
> >  
> >  #define BTF_INFO_ENCODE(kind, kind_flag, vlen)				\
> >  	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
> > @@ -46,8 +47,21 @@ struct btf_array_type {
> >  	struct btf_array array;
> >  };
> >  
> > +struct btf_var_type {
> > +	struct btf_type type;
> > +	struct btf_var var;
> > +};
> > +
> >  uint8_t btf_elf__verbose;
> >  
> > +static int btf_var_secinfo_cmp(const void *a, const void *b)
> > +{
> > +	const struct btf_var_secinfo *av = a;
> > +	const struct btf_var_secinfo *bv = b;
> > +
> > +	return av->offset - bv->offset;
> > +}
> > +
> >  uint32_t btf_elf__get32(struct btf_elf *btfe, uint32_t *p)
> >  {
> >  	uint32_t val = *p;
> > @@ -137,6 +151,8 @@ out:
> >  struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
> >  {
> >  	struct btf_elf *btfe = zalloc(sizeof(*btfe));
> > +	GElf_Shdr shdr;
> > +	Elf_Scn *sec;
> >  
> >  	if (!btfe)
> >  		return NULL;
> > @@ -193,6 +209,26 @@ struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
> >  	default:	 btfe->wordsize = 0; break;
> >  	}
> >  
> > +	btfe->symtab = elf_symtab__new(NULL, btfe->elf, &btfe->ehdr);
> > +	if (!btfe->symtab) {
> > +		if (btf_elf__verbose)
> > +			printf("%s: '%s' doesn't have symtab.\n", __func__,
> > +			       btfe->filename);
> > +		return btfe;
> > +	}
> > +
> > +	/* find percpu section's shndx */
> > +	sec = elf_section_by_name(btfe->elf, &btfe->ehdr, &shdr, PERCPU_SECTION,
> > +				  NULL);
> > +	if (!sec) {
> > +		if (btf_elf__verbose)
> > +			printf("%s: '%s' doesn't have '%s' section\n", __func__,
> > +			       btfe->filename, PERCPU_SECTION);
> > +		return btfe;
> > +	}
> > +	btfe->percpu_shndx = elf_ndxscn(sec);
> > +	btfe->percpu_base_addr = shdr.sh_addr;
> > +
> >  	return btfe;
> >  
> >  errout:
> > @@ -211,7 +247,10 @@ void btf_elf__delete(struct btf_elf *btfe)
> >  			elf_end(btfe->elf);
> >  	}
> >  
> > +	elf_symtab__delete(btfe->symtab);
> > +
> >  	__gobuffer__delete(&btfe->types);
> > +	__gobuffer__delete(&btfe->percpu_secinfo);
> >  	free(btfe->filename);
> >  	free(btfe->data);
> >  	free(btfe);
> > @@ -613,6 +652,86 @@ int32_t btf_elf__add_func_proto(struct btf_elf *btfe, struct ftype *ftype, uint3
> >  	return type_id;
> >  }
> >  
> > +int32_t btf_elf__add_var_type(struct btf_elf *btfe, uint32_t type, uint32_t name_off,
> > +			      uint32_t linkage)
> > +{
> > +	struct btf_var_type t;
> > +
> > +	t.type.name_off = name_off;
> > +	t.type.info = BTF_INFO_ENCODE(BTF_KIND_VAR, 0, 0);
> > +	t.type.type = type;
> > +
> > +	t.var.linkage = linkage;
> > +
> > +	++btfe->type_index;
> > +	if (gobuffer__add(&btfe->types, &t.type, sizeof(t)) < 0) {
> > +		btf_elf__log_type(btfe, &t.type, true, true,
> > +				  "type=%u name=%s Error in adding gobuffer",
> > +				  t.type.type, btf_elf__name_in_gobuf(btfe, t.type.name_off));
> > +		return -1;
> > +	}
> > +
> > +	btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s",
> > +			  t.type.type, btf_elf__name_in_gobuf(btfe, t.type.name_off));
> > +
> > +	return btfe->type_index;
> > +}
> > +
> > +int32_t btf_elf__add_var_secinfo(struct gobuffer *buf, uint32_t type,
> > +				 uint32_t offset, uint32_t size)
> > +{
> > +	struct btf_var_secinfo si = {
> > +		.type = type,
> > +		.offset = offset,
> > +		.size = size,
> > +	};
> > +	return gobuffer__add(buf, &si, sizeof(si));
> > +}
> > +
> > +extern struct strings *strings;
> > +
> > +int32_t btf_elf__add_datasec_type(struct btf_elf *btfe, const char *section_name,
> > +				  struct gobuffer *var_secinfo_buf)
> > +{
> > +	struct btf_type type;
> > +	size_t sz = gobuffer__size(var_secinfo_buf);
> > +	uint16_t nr_var_secinfo = sz / sizeof(struct btf_var_secinfo);
> > +	uint32_t name_off;
> > +
> > +	/*
> > +	 * dwarves doesn't store section names in its string table,
> > +	 * so we have to add it by ourselves.
> > +	 */
> > +	name_off = strings__add(strings, section_name);
> > +
> > +	type.name_off = name_off;
> > +	type.info = BTF_INFO_ENCODE(BTF_KIND_DATASEC, 0, nr_var_secinfo);
> > +	type.size = 0;
> > +
> > +	++btfe->type_index;
> > +	if (gobuffer__add(&btfe->types, &type, sizeof(type)) < 0) {
> > +		btf_elf__log_type(btfe, &type, true, true,
> > +				  "name=%s vlen=%u Error in adding datasec",
> > +				  btf_elf__name_in_gobuf(btfe, type.name_off),
> > +				  nr_var_secinfo);
> > +		return -1;
> > +	}
> > +	qsort(var_secinfo_buf->entries, nr_var_secinfo,
> > +	      sizeof(struct btf_var_secinfo), btf_var_secinfo_cmp);
> > +	if (gobuffer__add(&btfe->types, var_secinfo_buf->entries, sz) < 0) {
> > +		btf_elf__log_type(btfe, &type, true, true,
> > +				  "name=%s vlen=%u Error in adding var_secinfo",
> > +				  btf_elf__name_in_gobuf(btfe, type.name_off),
> > +				  nr_var_secinfo);
> > +		return -1;
> > +	}
> > +
> > +	btf_elf__log_type(btfe, &type, false, false, "type=datasec name=%s",
> > +			  btf_elf__name_in_gobuf(btfe, type.name_off));
> > +
> > +	return btfe->type_index;
> > +}
> > +
> >  static int btf_elf__write(const char *filename, struct btf *btf)
> >  {
> >  	GElf_Shdr shdr_mem, *shdr;
> > @@ -727,6 +846,10 @@ int btf_elf__encode(struct btf_elf *btfe, uint8_t flags)
> >  	if (gobuffer__size(&btfe->types) == 0)
> >  		return 0;
> >  
> > +	if (gobuffer__size(&btfe->percpu_secinfo) != 0)
> > +		btf_elf__add_datasec_type(btfe, PERCPU_SECTION,
> > +					  &btfe->percpu_secinfo);
> > +
> >  	btfe->size = sizeof(*hdr) + (gobuffer__size(&btfe->types) + gobuffer__size(btfe->strings));
> >  	btfe->data = zalloc(btfe->size);
> >  
> > diff --git a/libbtf.h b/libbtf.h
> > index f3c8500..be06480 100644
> > --- a/libbtf.h
> > +++ b/libbtf.h
> > @@ -20,8 +20,10 @@ struct btf_elf {
> >  	void		  *priv;
> >  	Elf		  *elf;
> >  	GElf_Ehdr	  ehdr;
> > +	struct elf_symtab *symtab;
> >  	struct gobuffer	  types;
> >  	struct gobuffer   *strings;
> > +	struct gobuffer   percpu_secinfo;
> >  	char		  *filename;
> >  	size_t		  size;
> >  	int		  swapped;
> > @@ -30,11 +32,15 @@ struct btf_elf {
> >  	bool		  is_big_endian;
> >  	bool		  raw_btf; // "/sys/kernel/btf/vmlinux"
> >  	uint32_t	  type_index;
> > +	uint32_t	  percpu_shndx;
> > +	uint64_t	  percpu_base_addr;
> >  };
> >  
> >  extern uint8_t btf_elf__verbose;
> >  #define btf_elf__verbose_log(fmt, ...) { if (btf_elf__verbose) printf(fmt, __VA_ARGS__); }
> >  
> > +#define PERCPU_SECTION ".data..percpu"
> > +
> >  struct base_type;
> >  struct ftype;
> >  
> > @@ -55,6 +61,12 @@ int32_t btf_elf__add_enum(struct btf_elf *btf, uint32_t name, uint32_t size,
> >  int btf_elf__add_enum_val(struct btf_elf *btf, uint32_t name, int32_t value);
> >  int32_t btf_elf__add_func_proto(struct btf_elf *btf, struct ftype *ftype,
> >  				uint32_t type_id_off);
> > +int32_t btf_elf__add_var_type(struct btf_elf *btfe, uint32_t type, uint32_t name_off,
> > +			      uint32_t linkage);
> > +int32_t btf_elf__add_var_secinfo(struct gobuffer *buf, uint32_t type,
> > +				 uint32_t offset, uint32_t size);
> > +int32_t btf_elf__add_datasec_type(struct btf_elf *btfe, const char *section_name,
> > +				  struct gobuffer *var_secinfo_buf);
> >  void btf_elf__set_strings(struct btf_elf *btf, struct gobuffer *strings);
> >  int  btf_elf__encode(struct btf_elf *btf, uint8_t flags);
> >  
> > diff --git a/pahole.c b/pahole.c
> > index e2a081b..8407db9 100644
> > --- a/pahole.c
> > +++ b/pahole.c
> > @@ -1084,6 +1084,7 @@ static error_t pahole__options_parser(int key, char *arg,
> >  	case 'i': find_containers = 1;
> >  		  class_name = arg;			break;
> >  	case 'J': btf_encode = 1;
> > +		  conf_load.get_addr_info = true;
> >  		  no_bitfield_type_recode = true;	break;
> >  	case 'l': conf.show_first_biggest_size_base_type_member = 1;	break;
> >  	case 'M': conf.show_only_data_members = 1;	break;
> > -- 
> > 2.27.0.278.ge193c7cf3a9-goog
> > 
> 
> -- 
> 
> - Arnaldo

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF.
       [not found]               ` <CA+khW7iEMXgtauLikO3YwUZus7hsdQti_KjZXk7uoCdPUBc=qw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2020-06-12 22:01                 ` Andrii Nakryiko
       [not found]                   ` <CA+khW7h1O+7XFuS-T-=3MUjr6qhbEE+tUyLbbHoSn6fWzN+xTg@mail.gmail.com>
  0 siblings, 1 reply; 6+ messages in thread
From: Andrii Nakryiko @ 2020-06-12 22:01 UTC (permalink / raw)
  To: Hao Luo
  Cc: Arnaldo Carvalho de Melo, Alexei Starovoitov, Daniel Borkmann,
	Oleg Rombakh, Martin KaFai Lau, dwarves-u79uwXL29TY76Z2rM5mHXA

On Fri, Jun 12, 2020 at 2:54 PM Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
> Further updates:
>
> Previously the in-kernel bpf verifier rejected the enhanced vmlinux because I set the "size" field of datasec to 0, which is obviously forbidden by the bpf kernel verifier. After I adjusted it to the last var_secinfo's offset + size, it got loaded successfully. In addition, there are a few more sanity checks in the verifier on DATASEC and VAR's meta format (e.g. type size, variable name, etc.), which I am going to port into btf_encoder to be 100% safe. With these checks, the "(anon)" vars seen by Arnaldo should be gone. I am currently running through a set of tp_btf, fentry and fexit programs on the enhanced vmlinux and they are looking good so far. I hope to upload these changes in the next iteration next week.

Do you know where those (anon) vars are coming from?

>
> Thanks,
> Hao
>
> On Thu, Jun 11, 2020 at 2:41 PM Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>>
>> I am finally able to get a tp_btf program compiled and tested against the generated vmlinux. Unfortunately, the bpf verifier seemed to have rejected the vmlinux. I got an error message "in-kernel BTF is malformed". I have to work on the bpf verifier first to make it compatible with the newly added VARs.
>>
>> Hao
>>
>> On Thu, Jun 11, 2020 at 10:59 AM Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>>>
>>> Hi Arnaldo,
>>>
>>> Sorry for the late reply, I was tied to other stuff on my other work in the last couple of days. I am going to take a closer look today and tomorrow. It seems I had difficulty reproducing in my local environment, maybe due to differences in compiler flags or kconfigs. Could you help me by enabling verbose to see which CU generated those symbols? In the patch I have added debug messages reporting the current CU and symbols that got encoded.
>>>
>>> Thanks,
>>> Hao
>>>
>>>
>>> On Tue, Jun 9, 2020 at 7:58 AM Arnaldo Carvalho de Melo <acme-DgEjT+Ai2yhQFI55V6+gNQ@public.gmane.orgg> wrote:
>>>>
>>>> Em Tue, Jun 09, 2020 at 11:29:40AM -0300, Arnaldo Carvalho de Melo escreveu:
>>>> > Em Mon, Jun 08, 2020 at 10:34:03AM -0700, Hao Luo escreveu:
>>>> > > On SMP systems, the global percpu variables are placed in a special
>>>> > > '.data..percpu' section, which is stored in a segment whose initial
>>>> > > address is set to 0, the addresses of per-CPU variables are relative
>>>> > > positive addresses [1].
>>>> > >
>>>> > > This patch extracts these variables from vmlinux and places them with
>>>> > > their type information in BTF. More specifically, when BTF is encoded,
>>>> > > we find the index of the '.data..percpu' section and then traverse
>>>> > > the symbol table to find those global objects which are in this section.
>>>> > > For each of these objects, we push a BTF_KIND_VAR into the types buffer,
>>>> > > and a BTF_VAR_SECINFO into another buffer, percpu_secinfo. When all the
>>>> > > CUs have finished processing, we push a BTF_KIND_DATASEC into the
>>>> > > btfe->types buffer, followed by the percpu_secinfo's content.
>>>> > >
>>>> > > In a v5.7-rc7 linux kernel, I was able to extract 291 such variables.
>>>> > > The build time overhead is small and the space overhead is also small.
>>>> >
>>>> > Looks good, I'm doing some testing on it now, Andrii, can you provide an
>>>> > Acked-by or Reviewed-by?
>>>>
>>>> So, I see these (anon) variables, what are these? for an 5.7 vmlinux:
>>>>
>>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -w VAR | tail
>>>> [67381] VAR '(anon)' type_id=67175, linkage=static
>>>> [67495] VAR 'rt_cache_stat' type_id=67417, linkage=static
>>>> [67496] VAR 'rt_uncached_list' type_id=67416, linkage=static
>>>> [67857] VAR 'tcp_md5sig_pool' type_id=67788, linkage=static
>>>> [67993] VAR 'tsq_tasklet' type_id=67927, linkage=static
>>>> [69524] VAR 'xfrm_trans_tasklet' type_id=69502, linkage=static
>>>> [70055] VAR 'rt6_uncached_list' type_id=67416, linkage=static
>>>> [70609] VAR '(anon)' type_id=1713, linkage=static
>>>> [70634] VAR 'hmac_ring' type_id=1909, linkage=static
>>>> [71591] VAR 'xskmap_flush_list' type_id=85, linkage=static
>>>> [acme@five pahole]$
>>>>
>>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -w 1713
>>>> [1713] FUNC 'memset' type_id=1712
>>>> [7235] VAR '(anon)' type_id=1713, linkage=static
>>>> [7236] VAR '(anon)' type_id=1713, linkage=static
>>>> [8832] VAR '(anon)' type_id=1713, linkage=static
>>>> [14346] VAR '(anon)' type_id=1713, linkage=static
>>>> [14347] VAR '(anon)' type_id=1713, linkage=static
>>>> [14348] VAR '(anon)' type_id=1713, linkage=static
>>>> [14349] VAR '(anon)' type_id=1713, linkage=static
>>>> [14350] VAR '(anon)' type_id=1713, linkage=static
>>>> [14351] VAR '(anon)' type_id=1713, linkage=static
>>>> [14352] VAR '(anon)' type_id=1713, linkage=static
>>>> [18903] VAR '(anon)' type_id=1713, linkage=static
>>>> [18904] VAR '(anon)' type_id=1713, linkage=static
>>>> [23180] VAR '(anon)' type_id=1713, linkage=static
>>>> [44605] VAR '(anon)' type_id=1713, linkage=static
>>>> [60638] VAR '(anon)' type_id=1713, linkage=static
>>>> [60639] VAR '(anon)' type_id=1713, linkage=static
>>>> [63869] VAR '(anon)' type_id=1713, linkage=static
>>>> [70609] VAR '(anon)' type_id=1713, linkage=static
>>>> [acme@five pahole]$
>>>>
>>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -m1 -w 1713 -B9
>>>> [1710] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
>>>>         'p' type_id=96
>>>>         'q' type_id=97
>>>>         'size' type_id=49
>>>> [1711] FUNC 'memcpy' type_id=1710
>>>> [1712] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
>>>>         'p' type_id=96
>>>>         'c' type_id=22
>>>>         'size' type_id=49
>>>> [1713] FUNC 'memset' type_id=1712
>>>> [acme@five pahole]$
>>>>
>>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -m10 -w 7235 -B5 -A5
>>>>         'prec' type_id=22
>>>> [7232] FUNC 'arch_show_interrupts' type_id=7231
>>>> [7233] FUNC_PROTO '(anon)' ret_type_id=0 vlen=1
>>>>         'irq' type_id=10
>>>> [7234] FUNC 'ack_bad_irq' type_id=7233
>>>> [7235] VAR '(anon)' type_id=1713, linkage=static
>>>> [7236] VAR '(anon)' type_id=1713, linkage=static
>>>> [7237] ARRAY '(anon)' type_id=233 index_type_id=1 nr_elems=4
>>>> [7238] FUNC 'irq_init_percpu_irqstack' type_id=6732
>>>> [7239] VAR 'irq_stack_backing_store' type_id=412, linkage=global-alloc
>>>> [7240] STRUCT 'estack_pages' size=8 vlen=3
>>>> --
>>>>         type_id=6739 offset=98096 size=16
>>>>         type_id=6737 offset=98112 size=16
>>>>         type_id=6736 offset=98128 size=16
>>>>         type_id=6764 offset=98144 size=16
>>>>         type_id=6763 offset=98160 size=16
>>>>         type_id=7235 offset=98176 size=0
>>>>         type_id=7330 offset=98192 size=4
>>>>         type_id=7329 offset=98200 size=8
>>>>         type_id=7328 offset=98208 size=4
>>>>         type_id=7325 offset=98216 size=8
>>>>         type_id=7327 offset=98224 size=1
>>>> [acme@five pahole]$
>>>>
>>>> [acme@five pahole]$ readelf  -wi vmlinux  | grep -m 2 DW_AT_producer
>>>>     <1c>   DW_AT_producer    : (indirect string, offset: 0x49): GNU AS 2.32
>>>>     <2e>   DW_AT_producer    : (indirect string, offset: 0x1358): GNU C89 9.3.1 20200408 (Red Hat 9.3.1-2) -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount -mfentry -march=x86-64 -g -O2 -std=gnu90 -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE -falign-jumps=1 -falign-loops=1 -fno-asynchronous-unwind-tables -fno-jump-tables -fno-delete-null-pointer-checks -fstack-protector-strong -fno-var-tracking-assignments -fno-strict-overflow -fno-merge-all-constants -fmerge-constants -fstack-check=no -fconserve-stack -fcf-protection=none --param allow-store-data-races=0
>>>> [acme@five pahole]$
>>>>
>>>> > Thanks,
>>>> >
>>>> > - Arnaldo
>>>> >
>>>> > > Testing:
>>>> > >
>>>> > > Before:
>>>> > >  $ readelf -SW vmlinux | grep BTF
>>>> > >  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d2bf8 00   A  0   0  1
>>>> > >
>>>> > > After:
>>>> > >  $ pahole -J vmlinux
>>>> > >  $ readelf -SW vmlinux  | grep BTF
>>>> > >  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c 2d5bca 00   A  0   0  1
>>>> > >
>>>> > > Common percpu vars can be found in the BTF section.
>>>> > >
>>>> > >  $ bpftool btf dump file vmlinux | grep runqueues
>>>> > >  [14098] VAR 'runqueues' type_id=13725, linkage=global-alloc
>>>> > >
>>>> > >  $ bpftool btf dump file vmlinux | grep 'cpu_stopper'
>>>> > >  [17592] STRUCT 'cpu_stopper' size=72 vlen=5
>>>> > >  [17612] VAR 'cpu_stopper' type_id=17592, linkage=static
>>>> > >
>>>> > >  $ bpftool btf dump file vmlinux | grep ' DATASEC '
>>>> > >  [63652] DATASEC '.data..percpu' size=0 vlen=294
>>>> > >
>>>> > > References:
>>>> > >  [1] https://lwn.net/Articles/531148/
>>>> > > Signed-off-by: Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>>>> > > ---
>>>> > > Changelog since v2:
>>>> > > - Move finding percpu_shndx and extracting symtab into btfe creation,
>>>> > >   so we don't have to allocate a new symtab for each CU.
>>>> > > - More debug msg by logging the vars encoded in 'verbose' mode. We
>>>> > >   probably don't want to log the symbols that are _not_ encoded,
>>>> > >   since that would be too verbose.
>>>> > > - Calculate var offsets using 'addr - shdr.sh_addr', so it could be
>>>> > >   generalized to other sections in future.
>>>> > > - Filter out the symbols that are not STT_OBJECT.
>>>> > > - Sort var_secinfos in the DATASEC by their offsets.
>>>> > > - Free 'persec_secinfo' buffer and 'symtab' in btfe deletion.
>>>> > > - Replace the string ".data..percpu" with a constant PERCPU_SECTION.
>>>> > >
>>>> > > Changelog since v1:
>>>> > > - Add a ".data..percpu" DATASEC that encodes the found VARs.
>>>> > > - Use percpu section's shndx to find the symbols that are percpu variables.
>>>> > > - Use the correct type to set VAR's linkage.
>>>> > >
>>>> > >  btf_encoder.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>> > >  dwarves.c     |   6 +++
>>>> > >  dwarves.h     |   2 +
>>>> > >  libbtf.c      | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> > >  libbtf.h      |  12 +++++
>>>> > >  pahole.c      |   1 +
>>>> > >  6 files changed, 263 insertions(+)
>>>> > >

[...]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF.
       [not found]                     ` <CA+khW7h1O+7XFuS-T-=3MUjr6qhbEE+tUyLbbHoSn6fWzN+xTg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2020-06-13  1:30                       ` Arnaldo Carvalho de Melo
  2020-06-13 22:12                       ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-06-13  1:30 UTC (permalink / raw)
  To: Hao Luo, Andrii Nakryiko
  Cc: Arnaldo Carvalho de Melo, Alexei Starovoitov, Daniel Borkmann,
	Oleg Rombakh, Martin KaFai Lau, dwarves-u79uwXL29TY76Z2rM5mHXA



On June 12, 2020 7:16:46 PM GMT-03:00, Hao Luo <haoluo@google.com> wrote:
>On Fri, Jun 12, 2020 at 3:01 PM Andrii Nakryiko
><andrii.nakryiko@gmail.com>
>wrote:
>
>> On Fri, Jun 12, 2020 at 2:54 PM Hao Luo <haoluo@google.com> wrote:
>> >
>> > Further updates:
>> >
>> > Previously the in-kernel bpf verifier rejected the enhanced vmlinux
>> because I set the "size" field of datasec to 0, which is obviously
>> forbidden by the bpf kernel verifier. After I adjusted it to the last
>> var_secinfo's offset + size, it got loaded successfully. In addition,
>there
>> are a few more sanity checks in the verifier on DATASEC and VAR's
>meta
>> format (e.g. type size, variable name, etc.), which I am going to
>port into
>> btf_encoder to be 100% safe. With these checks, the "(anon)" vars
>seen by
>> Arnaldo should be gone. I am currently running through a set of
>tp_btf,
>> fentry and fexit programs on the enhanced vmlinux and they are
>looking good
>> so far. I hope to upload these changes in the next iteration next
>week.
>>
>> Do you know where those (anon) vars are coming from?
>>
>
>Nah, I am curious too but can't reproduce on my side. It would be
>helpful
>if Arnaldo could enable the debug msg I put in the patch and let me
>know
>which cu generates those (anon) vars.

Unsure if I'll be able to do it tomorrow, I'll try.

>
>
>>
>> >
>> > Thanks,
>> > Hao
>> >
>> > On Thu, Jun 11, 2020 at 2:41 PM Hao Luo <haoluo@google.com> wrote:
>> >>
>> >> I am finally able to get a tp_btf program compiled and tested
>against
>> the generated vmlinux. Unfortunately, the bpf verifier seemed to have
>> rejected the vmlinux. I got an error message "in-kernel BTF is
>malformed".
>> I have to work on the bpf verifier first to make it compatible with
>the
>> newly added VARs.
>> >>
>> >> Hao
>> >>
>> >> On Thu, Jun 11, 2020 at 10:59 AM Hao Luo <haoluo@google.com>
>wrote:
>> >>>
>> >>> Hi Arnaldo,
>> >>>
>> >>> Sorry for the late reply, I was tied to other stuff on my other
>work
>> in the last couple of days. I am going to take a closer look today
>and
>> tomorrow. It seems I had difficulty reproducing in my local
>environment,
>> maybe due to differences in compiler flags or kconfigs. Could you
>help me
>> by enabling verbose to see which CU generated those symbols? In the
>patch I
>> have added debug messages reporting the current CU and symbols that
>got
>> encoded.
>> >>>
>> >>> Thanks,
>> >>> Hao
>> >>>
>> >>>
>> >>> On Tue, Jun 9, 2020 at 7:58 AM Arnaldo Carvalho de Melo <
>> acme@kernel.org> wrote:
>> >>>>
>> >>>> Em Tue, Jun 09, 2020 at 11:29:40AM -0300, Arnaldo Carvalho de
>Melo
>> escreveu:
>> >>>> > Em Mon, Jun 08, 2020 at 10:34:03AM -0700, Hao Luo escreveu:
>> >>>> > > On SMP systems, the global percpu variables are placed in a
>> special
>> >>>> > > '.data..percpu' section, which is stored in a segment whose
>> initial
>> >>>> > > address is set to 0, the addresses of per-CPU variables are
>> relative
>> >>>> > > positive addresses [1].
>> >>>> > >
>> >>>> > > This patch extracts these variables from vmlinux and places
>them
>> with
>> >>>> > > their type information in BTF. More specifically, when BTF
>is
>> encoded,
>> >>>> > > we find the index of the '.data..percpu' section and then
>traverse
>> >>>> > > the symbol table to find those global objects which are in
>this
>> section.
>> >>>> > > For each of these objects, we push a BTF_KIND_VAR into the
>types
>> buffer,
>> >>>> > > and a BTF_VAR_SECINFO into another buffer, percpu_secinfo.
>When
>> all the
>> >>>> > > CUs have finished processing, we push a BTF_KIND_DATASEC
>into the
>> >>>> > > btfe->types buffer, followed by the percpu_secinfo's
>content.
>> >>>> > >
>> >>>> > > In a v5.7-rc7 linux kernel, I was able to extract 291 such
>> variables.
>> >>>> > > The build time overhead is small and the space overhead is
>also
>> small.
>> >>>> >
>> >>>> > Looks good, I'm doing some testing on it now, Andrii, can you
>> provide an
>> >>>> > Acked-by or Reviewed-by?
>> >>>>
>> >>>> So, I see these (anon) variables, what are these? for an 5.7
>vmlinux:
>> >>>>
>> >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -w VAR
>|
>> tail
>> >>>> [67381] VAR '(anon)' type_id=67175, linkage=static
>> >>>> [67495] VAR 'rt_cache_stat' type_id=67417, linkage=static
>> >>>> [67496] VAR 'rt_uncached_list' type_id=67416, linkage=static
>> >>>> [67857] VAR 'tcp_md5sig_pool' type_id=67788, linkage=static
>> >>>> [67993] VAR 'tsq_tasklet' type_id=67927, linkage=static
>> >>>> [69524] VAR 'xfrm_trans_tasklet' type_id=69502, linkage=static
>> >>>> [70055] VAR 'rt6_uncached_list' type_id=67416, linkage=static
>> >>>> [70609] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [70634] VAR 'hmac_ring' type_id=1909, linkage=static
>> >>>> [71591] VAR 'xskmap_flush_list' type_id=85, linkage=static
>> >>>> [acme@five pahole]$
>> >>>>
>> >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -w 1713
>> >>>> [1713] FUNC 'memset' type_id=1712
>> >>>> [7235] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [7236] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [8832] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14346] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14347] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14348] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14349] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14350] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14351] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [14352] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [18903] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [18904] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [23180] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [44605] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [60638] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [60639] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [63869] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [70609] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [acme@five pahole]$
>> >>>>
>> >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -m1 -w
>1713
>> -B9
>> >>>> [1710] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
>> >>>>         'p' type_id=96
>> >>>>         'q' type_id=97
>> >>>>         'size' type_id=49
>> >>>> [1711] FUNC 'memcpy' type_id=1710
>> >>>> [1712] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
>> >>>>         'p' type_id=96
>> >>>>         'c' type_id=22
>> >>>>         'size' type_id=49
>> >>>> [1713] FUNC 'memset' type_id=1712
>> >>>> [acme@five pahole]$
>> >>>>
>> >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -m10 -w
>> 7235 -B5 -A5
>> >>>>         'prec' type_id=22
>> >>>> [7232] FUNC 'arch_show_interrupts' type_id=7231
>> >>>> [7233] FUNC_PROTO '(anon)' ret_type_id=0 vlen=1
>> >>>>         'irq' type_id=10
>> >>>> [7234] FUNC 'ack_bad_irq' type_id=7233
>> >>>> [7235] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [7236] VAR '(anon)' type_id=1713, linkage=static
>> >>>> [7237] ARRAY '(anon)' type_id=233 index_type_id=1 nr_elems=4
>> >>>> [7238] FUNC 'irq_init_percpu_irqstack' type_id=6732
>> >>>> [7239] VAR 'irq_stack_backing_store' type_id=412,
>linkage=global-alloc
>> >>>> [7240] STRUCT 'estack_pages' size=8 vlen=3
>> >>>> --
>> >>>>         type_id=6739 offset=98096 size=16
>> >>>>         type_id=6737 offset=98112 size=16
>> >>>>         type_id=6736 offset=98128 size=16
>> >>>>         type_id=6764 offset=98144 size=16
>> >>>>         type_id=6763 offset=98160 size=16
>> >>>>         type_id=7235 offset=98176 size=0
>> >>>>         type_id=7330 offset=98192 size=4
>> >>>>         type_id=7329 offset=98200 size=8
>> >>>>         type_id=7328 offset=98208 size=4
>> >>>>         type_id=7325 offset=98216 size=8
>> >>>>         type_id=7327 offset=98224 size=1
>> >>>> [acme@five pahole]$
>> >>>>
>> >>>> [acme@five pahole]$ readelf  -wi vmlinux  | grep -m 2
>DW_AT_producer
>> >>>>     <1c>   DW_AT_producer    : (indirect string, offset: 0x49):
>GNU
>> AS 2.32
>> >>>>     <2e>   DW_AT_producer    : (indirect string, offset:
>0x1358): GNU
>> C89 9.3.1 20200408 (Red Hat 9.3.1-2) -mno-sse -mno-mmx -mno-sse2
>-mno-3dnow
>> -mno-avx -m64 -mno-80387 -mno-fp-ret-in-387
>-mpreferred-stack-boundary=3
>> -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel
>> -mindirect-branch=thunk-extern -mindirect-branch-register
>-mrecord-mcount
>> -mfentry -march=x86-64 -g -O2 -std=gnu90 -fno-strict-aliasing
>-fno-common
>> -fshort-wchar -fno-PIE -falign-jumps=1 -falign-loops=1
>> -fno-asynchronous-unwind-tables -fno-jump-tables
>> -fno-delete-null-pointer-checks -fstack-protector-strong
>> -fno-var-tracking-assignments -fno-strict-overflow
>-fno-merge-all-constants
>> -fmerge-constants -fstack-check=no -fconserve-stack
>-fcf-protection=none
>> --param allow-store-data-races=0
>> >>>> [acme@five pahole]$
>> >>>>
>> >>>> > Thanks,
>> >>>> >
>> >>>> > - Arnaldo
>> >>>> >
>> >>>> > > Testing:
>> >>>> > >
>> >>>> > > Before:
>> >>>> > >  $ readelf -SW vmlinux | grep BTF
>> >>>> > >  [25] .BTF              PROGBITS        ffffffff821a905c
>13a905c
>> 2d2bf8 00   A  0   0  1
>> >>>> > >
>> >>>> > > After:
>> >>>> > >  $ pahole -J vmlinux
>> >>>> > >  $ readelf -SW vmlinux  | grep BTF
>> >>>> > >  [25] .BTF              PROGBITS        ffffffff821a905c
>13a905c
>> 2d5bca 00   A  0   0  1
>> >>>> > >
>> >>>> > > Common percpu vars can be found in the BTF section.
>> >>>> > >
>> >>>> > >  $ bpftool btf dump file vmlinux | grep runqueues
>> >>>> > >  [14098] VAR 'runqueues' type_id=13725, linkage=global-alloc
>> >>>> > >
>> >>>> > >  $ bpftool btf dump file vmlinux | grep 'cpu_stopper'
>> >>>> > >  [17592] STRUCT 'cpu_stopper' size=72 vlen=5
>> >>>> > >  [17612] VAR 'cpu_stopper' type_id=17592, linkage=static
>> >>>> > >
>> >>>> > >  $ bpftool btf dump file vmlinux | grep ' DATASEC '
>> >>>> > >  [63652] DATASEC '.data..percpu' size=0 vlen=294
>> >>>> > >
>> >>>> > > References:
>> >>>> > >  [1] https://lwn.net/Articles/531148/
>> >>>> > > Signed-off-by: Hao Luo <haoluo@google.com>
>> >>>> > > ---
>> >>>> > > Changelog since v2:
>> >>>> > > - Move finding percpu_shndx and extracting symtab into btfe
>> creation,
>> >>>> > >   so we don't have to allocate a new symtab for each CU.
>> >>>> > > - More debug msg by logging the vars encoded in 'verbose'
>mode. We
>> >>>> > >   probably don't want to log the symbols that are _not_
>encoded,
>> >>>> > >   since that would be too verbose.
>> >>>> > > - Calculate var offsets using 'addr - shdr.sh_addr', so it
>could
>> be
>> >>>> > >   generalized to other sections in future.
>> >>>> > > - Filter out the symbols that are not STT_OBJECT.
>> >>>> > > - Sort var_secinfos in the DATASEC by their offsets.
>> >>>> > > - Free 'persec_secinfo' buffer and 'symtab' in btfe
>deletion.
>> >>>> > > - Replace the string ".data..percpu" with a constant
>> PERCPU_SECTION.
>> >>>> > >
>> >>>> > > Changelog since v1:
>> >>>> > > - Add a ".data..percpu" DATASEC that encodes the found VARs.
>> >>>> > > - Use percpu section's shndx to find the symbols that are
>percpu
>> variables.
>> >>>> > > - Use the correct type to set VAR's linkage.
>> >>>> > >
>> >>>> > >  btf_encoder.c | 119
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>> >>>> > >  dwarves.c     |   6 +++
>> >>>> > >  dwarves.h     |   2 +
>> >>>> > >  libbtf.c      | 123
>> ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>>> > >  libbtf.h      |  12 +++++
>> >>>> > >  pahole.c      |   1 +
>> >>>> > >  6 files changed, 263 insertions(+)
>> >>>> > >
>>
>> [...]
>>

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF.
       [not found]                     ` <CA+khW7h1O+7XFuS-T-=3MUjr6qhbEE+tUyLbbHoSn6fWzN+xTg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2020-06-13  1:30                       ` Arnaldo Carvalho de Melo
@ 2020-06-13 22:12                       ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-06-13 22:12 UTC (permalink / raw)
  To: Hao Luo
  Cc: Andrii Nakryiko, Alexei Starovoitov, Daniel Borkmann,
	Oleg Rombakh, Martin KaFai Lau, dwarves-u79uwXL29TY76Z2rM5mHXA

Em Fri, Jun 12, 2020 at 03:16:46PM -0700, Hao Luo escreveu:
> On Fri, Jun 12, 2020 at 3:01 PM Andrii Nakryiko <andrii.nakryiko-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> wrote:
> 
> > On Fri, Jun 12, 2020 at 2:54 PM Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> > >
> > > Further updates:
> > >
> > > Previously the in-kernel bpf verifier rejected the enhanced vmlinux
> > because I set the "size" field of datasec to 0, which is obviously
> > forbidden by the bpf kernel verifier. After I adjusted it to the last
> > var_secinfo's offset + size, it got loaded successfully. In addition, there
> > are a few more sanity checks in the verifier on DATASEC and VAR's meta
> > format (e.g. type size, variable name, etc.), which I am going to port into
> > btf_encoder to be 100% safe. With these checks, the "(anon)" vars seen by
> > Arnaldo should be gone. I am currently running through a set of tp_btf,
> > fentry and fexit programs on the enhanced vmlinux and they are looking good
> > so far. I hope to upload these changes in the next iteration next week.
> >
> > Do you know where those (anon) vars are coming from?
> >
> 
> Nah, I am curious too but can't reproduce on my side. It would be helpful
> if Arnaldo could enable the debug msg I put in the patch and let me know
> which cu generates those (anon) vars.

I'll try and send it, maybe tomorrow (Sunday).
 
> 
> >
> > >
> > > Thanks,
> > > Hao
> > >
> > > On Thu, Jun 11, 2020 at 2:41 PM Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> > >>
> > >> I am finally able to get a tp_btf program compiled and tested against
> > the generated vmlinux. Unfortunately, the bpf verifier seemed to have
> > rejected the vmlinux. I got an error message "in-kernel BTF is malformed".
> > I have to work on the bpf verifier first to make it compatible with the
> > newly added VARs.
> > >>
> > >> Hao
> > >>
> > >> On Thu, Jun 11, 2020 at 10:59 AM Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> > >>>
> > >>> Hi Arnaldo,
> > >>>
> > >>> Sorry for the late reply, I was tied to other stuff on my other work
> > in the last couple of days. I am going to take a closer look today and
> > tomorrow. It seems I had difficulty reproducing in my local environment,
> > maybe due to differences in compiler flags or kconfigs. Could you help me
> > by enabling verbose to see which CU generated those symbols? In the patch I
> > have added debug messages reporting the current CU and symbols that got
> > encoded.
> > >>>
> > >>> Thanks,
> > >>> Hao
> > >>>
> > >>>
> > >>> On Tue, Jun 9, 2020 at 7:58 AM Arnaldo Carvalho de Melo <
> > acme-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> > >>>>
> > >>>> Em Tue, Jun 09, 2020 at 11:29:40AM -0300, Arnaldo Carvalho de Melo
> > escreveu:
> > >>>> > Em Mon, Jun 08, 2020 at 10:34:03AM -0700, Hao Luo escreveu:
> > >>>> > > On SMP systems, the global percpu variables are placed in a
> > special
> > >>>> > > '.data..percpu' section, which is stored in a segment whose
> > initial
> > >>>> > > address is set to 0, the addresses of per-CPU variables are
> > relative
> > >>>> > > positive addresses [1].
> > >>>> > >
> > >>>> > > This patch extracts these variables from vmlinux and places them
> > with
> > >>>> > > their type information in BTF. More specifically, when BTF is
> > encoded,
> > >>>> > > we find the index of the '.data..percpu' section and then traverse
> > >>>> > > the symbol table to find those global objects which are in this
> > section.
> > >>>> > > For each of these objects, we push a BTF_KIND_VAR into the types
> > buffer,
> > >>>> > > and a BTF_VAR_SECINFO into another buffer, percpu_secinfo. When
> > all the
> > >>>> > > CUs have finished processing, we push a BTF_KIND_DATASEC into the
> > >>>> > > btfe->types buffer, followed by the percpu_secinfo's content.
> > >>>> > >
> > >>>> > > In a v5.7-rc7 linux kernel, I was able to extract 291 such
> > variables.
> > >>>> > > The build time overhead is small and the space overhead is also
> > small.
> > >>>> >
> > >>>> > Looks good, I'm doing some testing on it now, Andrii, can you
> > provide an
> > >>>> > Acked-by or Reviewed-by?
> > >>>>
> > >>>> So, I see these (anon) variables, what are these? for an 5.7 vmlinux:
> > >>>>
> > >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -w VAR |
> > tail
> > >>>> [67381] VAR '(anon)' type_id=67175, linkage=static
> > >>>> [67495] VAR 'rt_cache_stat' type_id=67417, linkage=static
> > >>>> [67496] VAR 'rt_uncached_list' type_id=67416, linkage=static
> > >>>> [67857] VAR 'tcp_md5sig_pool' type_id=67788, linkage=static
> > >>>> [67993] VAR 'tsq_tasklet' type_id=67927, linkage=static
> > >>>> [69524] VAR 'xfrm_trans_tasklet' type_id=69502, linkage=static
> > >>>> [70055] VAR 'rt6_uncached_list' type_id=67416, linkage=static
> > >>>> [70609] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [70634] VAR 'hmac_ring' type_id=1909, linkage=static
> > >>>> [71591] VAR 'xskmap_flush_list' type_id=85, linkage=static
> > >>>> [acme@five pahole]$
> > >>>>
> > >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -w 1713
> > >>>> [1713] FUNC 'memset' type_id=1712
> > >>>> [7235] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [7236] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [8832] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14346] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14347] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14348] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14349] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14350] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14351] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [14352] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [18903] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [18904] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [23180] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [44605] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [60638] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [60639] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [63869] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [70609] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [acme@five pahole]$
> > >>>>
> > >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -m1 -w 1713
> > -B9
> > >>>> [1710] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
> > >>>>         'p' type_id=96
> > >>>>         'q' type_id=97
> > >>>>         'size' type_id=49
> > >>>> [1711] FUNC 'memcpy' type_id=1710
> > >>>> [1712] FUNC_PROTO '(anon)' ret_type_id=96 vlen=3
> > >>>>         'p' type_id=96
> > >>>>         'c' type_id=22
> > >>>>         'size' type_id=49
> > >>>> [1713] FUNC 'memset' type_id=1712
> > >>>> [acme@five pahole]$
> > >>>>
> > >>>> [acme@five pahole]$ bpftool btf dump file vmlinux | grep -m10 -w
> > 7235 -B5 -A5
> > >>>>         'prec' type_id=22
> > >>>> [7232] FUNC 'arch_show_interrupts' type_id=7231
> > >>>> [7233] FUNC_PROTO '(anon)' ret_type_id=0 vlen=1
> > >>>>         'irq' type_id=10
> > >>>> [7234] FUNC 'ack_bad_irq' type_id=7233
> > >>>> [7235] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [7236] VAR '(anon)' type_id=1713, linkage=static
> > >>>> [7237] ARRAY '(anon)' type_id=233 index_type_id=1 nr_elems=4
> > >>>> [7238] FUNC 'irq_init_percpu_irqstack' type_id=6732
> > >>>> [7239] VAR 'irq_stack_backing_store' type_id=412, linkage=global-alloc
> > >>>> [7240] STRUCT 'estack_pages' size=8 vlen=3
> > >>>> --
> > >>>>         type_id=6739 offset=98096 size=16
> > >>>>         type_id=6737 offset=98112 size=16
> > >>>>         type_id=6736 offset=98128 size=16
> > >>>>         type_id=6764 offset=98144 size=16
> > >>>>         type_id=6763 offset=98160 size=16
> > >>>>         type_id=7235 offset=98176 size=0
> > >>>>         type_id=7330 offset=98192 size=4
> > >>>>         type_id=7329 offset=98200 size=8
> > >>>>         type_id=7328 offset=98208 size=4
> > >>>>         type_id=7325 offset=98216 size=8
> > >>>>         type_id=7327 offset=98224 size=1
> > >>>> [acme@five pahole]$
> > >>>>
> > >>>> [acme@five pahole]$ readelf  -wi vmlinux  | grep -m 2 DW_AT_producer
> > >>>>     <1c>   DW_AT_producer    : (indirect string, offset: 0x49): GNU
> > AS 2.32
> > >>>>     <2e>   DW_AT_producer    : (indirect string, offset: 0x1358): GNU
> > C89 9.3.1 20200408 (Red Hat 9.3.1-2) -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
> > -mno-avx -m64 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3
> > -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel
> > -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount
> > -mfentry -march=x86-64 -g -O2 -std=gnu90 -fno-strict-aliasing -fno-common
> > -fshort-wchar -fno-PIE -falign-jumps=1 -falign-loops=1
> > -fno-asynchronous-unwind-tables -fno-jump-tables
> > -fno-delete-null-pointer-checks -fstack-protector-strong
> > -fno-var-tracking-assignments -fno-strict-overflow -fno-merge-all-constants
> > -fmerge-constants -fstack-check=no -fconserve-stack -fcf-protection=none
> > --param allow-store-data-races=0
> > >>>> [acme@five pahole]$
> > >>>>
> > >>>> > Thanks,
> > >>>> >
> > >>>> > - Arnaldo
> > >>>> >
> > >>>> > > Testing:
> > >>>> > >
> > >>>> > > Before:
> > >>>> > >  $ readelf -SW vmlinux | grep BTF
> > >>>> > >  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c
> > 2d2bf8 00   A  0   0  1
> > >>>> > >
> > >>>> > > After:
> > >>>> > >  $ pahole -J vmlinux
> > >>>> > >  $ readelf -SW vmlinux  | grep BTF
> > >>>> > >  [25] .BTF              PROGBITS        ffffffff821a905c 13a905c
> > 2d5bca 00   A  0   0  1
> > >>>> > >
> > >>>> > > Common percpu vars can be found in the BTF section.
> > >>>> > >
> > >>>> > >  $ bpftool btf dump file vmlinux | grep runqueues
> > >>>> > >  [14098] VAR 'runqueues' type_id=13725, linkage=global-alloc
> > >>>> > >
> > >>>> > >  $ bpftool btf dump file vmlinux | grep 'cpu_stopper'
> > >>>> > >  [17592] STRUCT 'cpu_stopper' size=72 vlen=5
> > >>>> > >  [17612] VAR 'cpu_stopper' type_id=17592, linkage=static
> > >>>> > >
> > >>>> > >  $ bpftool btf dump file vmlinux | grep ' DATASEC '
> > >>>> > >  [63652] DATASEC '.data..percpu' size=0 vlen=294
> > >>>> > >
> > >>>> > > References:
> > >>>> > >  [1] https://lwn.net/Articles/531148/
> > >>>> > > Signed-off-by: Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> > >>>> > > ---
> > >>>> > > Changelog since v2:
> > >>>> > > - Move finding percpu_shndx and extracting symtab into btfe
> > creation,
> > >>>> > >   so we don't have to allocate a new symtab for each CU.
> > >>>> > > - More debug msg by logging the vars encoded in 'verbose' mode. We
> > >>>> > >   probably don't want to log the symbols that are _not_ encoded,
> > >>>> > >   since that would be too verbose.
> > >>>> > > - Calculate var offsets using 'addr - shdr.sh_addr', so it could
> > be
> > >>>> > >   generalized to other sections in future.
> > >>>> > > - Filter out the symbols that are not STT_OBJECT.
> > >>>> > > - Sort var_secinfos in the DATASEC by their offsets.
> > >>>> > > - Free 'persec_secinfo' buffer and 'symtab' in btfe deletion.
> > >>>> > > - Replace the string ".data..percpu" with a constant
> > PERCPU_SECTION.
> > >>>> > >
> > >>>> > > Changelog since v1:
> > >>>> > > - Add a ".data..percpu" DATASEC that encodes the found VARs.
> > >>>> > > - Use percpu section's shndx to find the symbols that are percpu
> > variables.
> > >>>> > > - Use the correct type to set VAR's linkage.
> > >>>> > >
> > >>>> > >  btf_encoder.c | 119
> > ++++++++++++++++++++++++++++++++++++++++++++++++
> > >>>> > >  dwarves.c     |   6 +++
> > >>>> > >  dwarves.h     |   2 +
> > >>>> > >  libbtf.c      | 123
> > ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >>>> > >  libbtf.h      |  12 +++++
> > >>>> > >  pahole.c      |   1 +
> > >>>> > >  6 files changed, 263 insertions(+)
> > >>>> > >
> >
> > [...]
> >

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-08 17:34 [PATCH v3] btf_encoder: Teach pahole to store percpu variables in vmlinux BTF Hao Luo
     [not found] ` <20200608173403.151706-1-haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
2020-06-09 14:29   ` Arnaldo Carvalho de Melo
     [not found]     ` <20200609142940.GA24868-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2020-06-09 14:58       ` Arnaldo Carvalho de Melo
     [not found]         ` <CA+khW7j_bBNNepxyk4pZQLMS3CxA4CKQ-9cSSub-hTDQW5xZVQ@mail.gmail.com>
     [not found]           ` <CA+khW7iBAFELfYmJDQK5eQ-Q+bCg7Hv3WAYPLz6iPXOO6+TQHw@mail.gmail.com>
     [not found]             ` <CA+khW7iEMXgtauLikO3YwUZus7hsdQti_KjZXk7uoCdPUBc=qw@mail.gmail.com>
     [not found]               ` <CA+khW7iEMXgtauLikO3YwUZus7hsdQti_KjZXk7uoCdPUBc=qw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-06-12 22:01                 ` Andrii Nakryiko
     [not found]                   ` <CA+khW7h1O+7XFuS-T-=3MUjr6qhbEE+tUyLbbHoSn6fWzN+xTg@mail.gmail.com>
     [not found]                     ` <CA+khW7h1O+7XFuS-T-=3MUjr6qhbEE+tUyLbbHoSn6fWzN+xTg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-06-13  1:30                       ` Arnaldo Carvalho de Melo
2020-06-13 22:12                       ` Arnaldo Carvalho de Melo

Dwarves Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/dwarves/0 dwarves/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dwarves dwarves/ https://lore.kernel.org/dwarves \
		dwarves@vger.kernel.org
	public-inbox-index dwarves

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.dwarves


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git