All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH pahole] pahole: use 32-bit integers for iterations within CU
@ 2019-03-07  0:23 Andrii Nakryiko
  2019-03-07 14:02 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 11+ messages in thread
From: Andrii Nakryiko @ 2019-03-07  0:23 UTC (permalink / raw)
  To: acme, arnaldo.melo, andrii.nakryiko, ast, yhs, dwarves, bpf
  Cc: Andrii Nakryiko

Existing code base assumes that single CU doesn't have more than 65535 types
per each CU, which might be a reasonable assumption for DWARF data. With BTF,
though, all we get is single, potentially huge, CU which can easily have more
than 65k types. For example, this is the case for allyesconfig version of
Linux kernel, which has >200k types.

Due to this assumption, libdwarves and other parts of pahole are using 16-bit
counters to iterate over entities within CU. This can cause infinite loop when
iterating BTF data, if there are more than 65535 types. This patch changes
non-public variables to use 32-bit integers, where appropriate.

This still leads to invalid reported data when using BTF loader (due to using
(X & 0xFFFF) type ID, instead of X, when X > 65535) and loading huge files,
but at least it's not stuck in an infinite loop anymore.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 btf_loader.c      |  4 ++--
 dwarves.c         | 16 ++++++++--------
 dwarves_fprintf.c |  8 ++++----
 pahole.c          | 36 ++++++++++++++++++------------------
 4 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/btf_loader.c b/btf_loader.c
index 867c608..3de774a 100644
--- a/btf_loader.c
+++ b/btf_loader.c
@@ -45,7 +45,7 @@ static void *tag__alloc(const size_t size)
 	return tag;
 }
 
-static int btf_elf__load_ftype(struct btf_elf *btfe, struct ftype *proto, uint16_t tag,
+static int btf_elf__load_ftype(struct btf_elf *btfe, struct ftype *proto, uint32_t tag,
 			       uint32_t type, uint16_t vlen, struct btf_param *args, long id)
 {
 	int i;
@@ -100,7 +100,7 @@ static struct base_type *base_type__new(strings_t name, uint32_t attrs,
 	return bt;
 }
 
-static void type__init(struct type *type, uint16_t tag,
+static void type__init(struct type *type, uint32_t tag,
 		       strings_t name, size_t size)
 {
 	INIT_LIST_HEAD(&type->node);
diff --git a/dwarves.c b/dwarves.c
index 111797b..3594ae3 100644
--- a/dwarves.c
+++ b/dwarves.c
@@ -349,7 +349,7 @@ reevaluate:
 
 static void cu__find_class_holes(struct cu *cu)
 {
-	uint16_t id;
+	uint32_t id;
 	struct class *pos;
 
 	cu__for_each_struct(cu, id, pos)
@@ -566,7 +566,7 @@ struct tag *cu__type(const struct cu *cu, const uint16_t id)
 struct tag *cu__find_first_typedef_of_type(const struct cu *cu,
 					   const uint16_t type)
 {
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 
 	if (cu == NULL || type == 0)
@@ -582,7 +582,7 @@ struct tag *cu__find_first_typedef_of_type(const struct cu *cu,
 struct tag *cu__find_base_type_by_name(const struct cu *cu,
 				       const char *name, uint16_t *idp)
 {
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 
 	if (cu == NULL || name == NULL)
@@ -611,7 +611,7 @@ struct tag *cu__find_base_type_by_sname_and_size(const struct cu *cu,
 						 uint16_t bit_size,
 						 uint16_t *idp)
 {
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 
 	if (sname == 0)
@@ -638,7 +638,7 @@ struct tag *cu__find_enumeration_by_sname_and_size(const struct cu *cu,
 						   uint16_t bit_size,
 						   uint16_t *idp)
 {
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 
 	if (sname == 0)
@@ -663,7 +663,7 @@ struct tag *cu__find_enumeration_by_sname_and_size(const struct cu *cu,
 struct tag *cu__find_struct_by_sname(const struct cu *cu, strings_t sname,
 				     const int include_decls, uint16_t *idp)
 {
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 
 	if (sname == 0)
@@ -699,7 +699,7 @@ static struct tag *__cu__find_struct_by_name(const struct cu *cu, const char *na
 	if (cu == NULL || name == NULL)
 		return NULL;
 
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 	cu__for_each_type(cu, id, pos) {
 		struct type *type;
@@ -1128,7 +1128,7 @@ void lexblock__add_label(struct lexblock *block, struct label *label)
 }
 
 const struct class_member *class__find_bit_hole(const struct class *class,
-					    const struct class_member *trailer,
+						const struct class_member *trailer,
 						const uint16_t bit_hole_size)
 {
 	struct class_member *pos;
diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c
index 490dc30..530c27b 100644
--- a/dwarves_fprintf.c
+++ b/dwarves_fprintf.c
@@ -163,7 +163,7 @@ static const char *tag__accessibility(const struct tag *tag)
 	return NULL;
 }
 
-static size_t __tag__id_not_found_snprintf(char *bf, size_t len, uint16_t id,
+static size_t __tag__id_not_found_snprintf(char *bf, size_t len, uint32_t id,
 					   const char *fn, int line)
 {
 	return snprintf(bf, len, "<ERROR(%s:%d): %#llx not found!>", fn, line,
@@ -425,7 +425,7 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu,
 		return tag__ptr_name(tag, cu, bf, len, "&");
 	case DW_TAG_ptr_to_member_type: {
 		char suffix[512];
-		uint16_t id = tag__ptr_to_member_type(tag)->containing_type;
+		uint32_t id = tag__ptr_to_member_type(tag)->containing_type;
 
 		type = cu__type(cu, id);
 		if (type != NULL)
@@ -1213,7 +1213,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
 	struct type *type = &class->type;
 	size_t last_size = 0, size;
 	uint8_t newline = 0;
-	uint16_t nr_paddings = 0;
+	uint32_t nr_paddings = 0;
 	uint32_t sum = 0;
 	uint32_t sum_holes = 0;
 	uint32_t sum_paddings = 0;
@@ -1225,7 +1225,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
 	struct tag *tag_pos;
 	const char *current_accessibility = NULL;
 	struct conf_fprintf cconf = conf ? *conf : conf_fprintf__defaults;
-	const uint16_t t = type->namespace.tag.tag;
+	const uint32_t t = type->namespace.tag.tag;
 	size_t printed = fprintf(fp, "%s%s%s%s%s",
 				 cconf.prefix ?: "", cconf.prefix ? " " : "",
 				 ((cconf.classes_as_structs ||
diff --git a/pahole.c b/pahole.c
index 2230e37..bec279c 100644
--- a/pahole.c
+++ b/pahole.c
@@ -41,9 +41,9 @@ static size_t cu__exclude_prefix_len;
 static char *decl_exclude_prefix;
 static size_t decl_exclude_prefix_len;
 
-static uint16_t nr_holes;
-static uint16_t nr_bit_holes;
-static uint16_t hole_size_ge;
+static uint32_t nr_holes;
+static uint32_t nr_bit_holes;
+static uint32_t hole_size_ge;
 static uint8_t show_packable;
 static uint8_t global_verbose;
 static uint8_t recursive;
@@ -156,7 +156,7 @@ static void nr_definitions_formatter(struct structure *st)
 }
 
 static void nr_members_formatter(struct class *class,
-				 struct cu *cu, uint16_t id __unused)
+				 struct cu *cu, uint32_t id __unused)
 {
 	printf("%s%c%u\n", class__name(class, cu), separator,
 	       class__nr_members(class));
@@ -168,26 +168,26 @@ static void nr_methods_formatter(struct structure *st)
 }
 
 static void size_formatter(struct class *class,
-			   struct cu *cu, uint16_t id __unused)
+			   struct cu *cu, uint32_t id __unused)
 {
 	printf("%s%c%d%c%u\n", class__name(class, cu), separator,
 	       class__size(class), separator, class->nr_holes);
 }
 
 static void class_name_len_formatter(struct class *class, struct cu *cu,
-				     uint16_t id __unused)
+				     uint32_t id __unused)
 {
 	const char *name = class__name(class, cu);
 	printf("%s%c%zd\n", name, separator, strlen(name));
 }
 
 static void class_name_formatter(struct class *class,
-				 struct cu *cu, uint16_t id __unused)
+				 struct cu *cu, uint32_t id __unused)
 {
 	puts(class__name(class, cu));
 }
 
-static void class_formatter(struct class *class, struct cu *cu, uint16_t id)
+static void class_formatter(struct class *class, struct cu *cu, uint32_t id)
 {
 	struct tag *typedef_alias = NULL;
 	struct tag *tag = class__tag(class);
@@ -224,7 +224,7 @@ static void class_formatter(struct class *class, struct cu *cu, uint16_t id)
 	putchar('\n');
 }
 
-static void print_packable_info(struct class *c, struct cu *cu, uint16_t id)
+static void print_packable_info(struct class *c, struct cu *cu, uint32_t id)
 {
 	const struct tag *t = class__tag(c);
 	const size_t orig_size = class__size(c);
@@ -267,14 +267,14 @@ static void print_stats(void)
 }
 
 static struct class *class__filter(struct class *class, struct cu *cu,
-				   uint16_t tag_id);
+				   uint32_t tag_id);
 
 static void (*formatter)(struct class *class,
-			 struct cu *cu, uint16_t id) = class_formatter;
+			 struct cu *cu, uint32_t id) = class_formatter;
 
 static void print_classes(struct cu *cu)
 {
-	uint16_t id;
+	uint32_t id;
 	struct class *pos;
 
 	cu__for_each_struct_or_union(cu, id, pos) {
@@ -352,7 +352,7 @@ static int class__packable(struct class *class, struct cu *cu)
 }
 
 static struct class *class__filter(struct class *class, struct cu *cu,
-				   uint16_t tag_id)
+				   uint32_t tag_id)
 {
 	struct tag *tag = class__tag(class);
 	const char *name;
@@ -608,7 +608,7 @@ static void cu_fixup_word_size_iterator(struct cu *cu)
 	original_word_size = cu->addr_size;
 	cu->addr_size = word_size;
 
-	uint16_t id;
+	uint32_t id;
 	struct tag *pos;
 	cu__for_each_type(cu, id, pos)
 		tag__fixup_word_size(pos, cu);
@@ -659,11 +659,11 @@ static void cu__account_nr_methods(struct cu *cu)
 
 static char tab[128];
 
-static void print_structs_with_pointer_to(const struct cu *cu, uint16_t type)
+static void print_structs_with_pointer_to(const struct cu *cu, uint32_t type)
 {
 	struct class *pos;
 	struct class_member *pos_member;
-	uint16_t id;
+	uint32_t id;
 
 	cu__for_each_struct(cu, id, pos) {
 		bool looked = false;
@@ -702,10 +702,10 @@ static void print_structs_with_pointer_to(const struct cu *cu, uint16_t type)
 	}
 }
 
-static void print_containers(const struct cu *cu, uint16_t type, int ident)
+static void print_containers(const struct cu *cu, uint32_t type, int ident)
 {
 	struct class *pos;
-	uint16_t id;
+	uint32_t id;
 
 	cu__for_each_struct(cu, id, pos) {
 		if (pos->type.namespace.name == 0)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-07  0:23 [PATCH pahole] pahole: use 32-bit integers for iterations within CU Andrii Nakryiko
@ 2019-03-07 14:02 ` Arnaldo Carvalho de Melo
  2019-03-07 14:09   ` Arnaldo Carvalho de Melo
  2019-03-08  0:26   ` Arnaldo Carvalho de Melo
  0 siblings, 2 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-07 14:02 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: arnaldo.melo, andrii.nakryiko, ast, yhs, dwarves, bpf

Em Wed, Mar 06, 2019 at 04:23:21PM -0800, Andrii Nakryiko escreveu:
> Existing code base assumes that single CU doesn't have more than 65535 types
> per each CU, which might be a reasonable assumption for DWARF data. With BTF,
> though, all we get is single, potentially huge, CU which can easily have more
> than 65k types. For example, this is the case for allyesconfig version of
> Linux kernel, which has >200k types.

And your patch seems to be a step in removing this limitation, thanks
for working on it.
 
> Due to this assumption, libdwarves and other parts of pahole are using 16-bit
> counters to iterate over entities within CU. This can cause infinite loop when
> iterating BTF data, if there are more than 65535 types. This patch changes
> non-public variables to use 32-bit integers, where appropriate.

Ok, there are some bits that are unrelated, I'm comment on it and remove
before applying.
 
> This still leads to invalid reported data when using BTF loader (due to using
> (X & 0xFFFF) type ID, instead of X, when X > 65535) and loading huge files,
> but at least it's not stuck in an infinite loop anymore.

This can be a followup patch, I'm unaware of any usage of libdwarves,
but if this leads to ABI breakage, then we can come up with a new
SONAME.

Mostly the libdwarves has been a internal thing so that the various
tools don't have to statically link with it.

- Arnaldo
 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
>  btf_loader.c      |  4 ++--
>  dwarves.c         | 16 ++++++++--------
>  dwarves_fprintf.c |  8 ++++----
>  pahole.c          | 36 ++++++++++++++++++------------------
>  4 files changed, 32 insertions(+), 32 deletions(-)
> 
> diff --git a/btf_loader.c b/btf_loader.c
> index 867c608..3de774a 100644
> --- a/btf_loader.c
> +++ b/btf_loader.c
> @@ -45,7 +45,7 @@ static void *tag__alloc(const size_t size)
>  	return tag;
>  }
>  
> -static int btf_elf__load_ftype(struct btf_elf *btfe, struct ftype *proto, uint16_t tag,
> +static int btf_elf__load_ftype(struct btf_elf *btfe, struct ftype *proto, uint32_t tag,
>  			       uint32_t type, uint16_t vlen, struct btf_param *args, long id)
>  {
>  	int i;
> @@ -100,7 +100,7 @@ static struct base_type *base_type__new(strings_t name, uint32_t attrs,
>  	return bt;
>  }
>  
> -static void type__init(struct type *type, uint16_t tag,
> +static void type__init(struct type *type, uint32_t tag,
>  		       strings_t name, size_t size)
>  {
>  	INIT_LIST_HEAD(&type->node);
> diff --git a/dwarves.c b/dwarves.c
> index 111797b..3594ae3 100644
> --- a/dwarves.c
> +++ b/dwarves.c
> @@ -349,7 +349,7 @@ reevaluate:
>  
>  static void cu__find_class_holes(struct cu *cu)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct class *pos;
>  
>  	cu__for_each_struct(cu, id, pos)
> @@ -566,7 +566,7 @@ struct tag *cu__type(const struct cu *cu, const uint16_t id)
>  struct tag *cu__find_first_typedef_of_type(const struct cu *cu,
>  					   const uint16_t type)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  
>  	if (cu == NULL || type == 0)
> @@ -582,7 +582,7 @@ struct tag *cu__find_first_typedef_of_type(const struct cu *cu,
>  struct tag *cu__find_base_type_by_name(const struct cu *cu,
>  				       const char *name, uint16_t *idp)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  
>  	if (cu == NULL || name == NULL)
> @@ -611,7 +611,7 @@ struct tag *cu__find_base_type_by_sname_and_size(const struct cu *cu,
>  						 uint16_t bit_size,
>  						 uint16_t *idp)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  
>  	if (sname == 0)
> @@ -638,7 +638,7 @@ struct tag *cu__find_enumeration_by_sname_and_size(const struct cu *cu,
>  						   uint16_t bit_size,
>  						   uint16_t *idp)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  
>  	if (sname == 0)
> @@ -663,7 +663,7 @@ struct tag *cu__find_enumeration_by_sname_and_size(const struct cu *cu,
>  struct tag *cu__find_struct_by_sname(const struct cu *cu, strings_t sname,
>  				     const int include_decls, uint16_t *idp)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  
>  	if (sname == 0)
> @@ -699,7 +699,7 @@ static struct tag *__cu__find_struct_by_name(const struct cu *cu, const char *na
>  	if (cu == NULL || name == NULL)
>  		return NULL;
>  
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  	cu__for_each_type(cu, id, pos) {
>  		struct type *type;
> @@ -1128,7 +1128,7 @@ void lexblock__add_label(struct lexblock *block, struct label *label)
>  }
>  
>  const struct class_member *class__find_bit_hole(const struct class *class,
> -					    const struct class_member *trailer,
> +						const struct class_member *trailer,
>  						const uint16_t bit_hole_size)
>  {
>  	struct class_member *pos;
> diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c
> index 490dc30..530c27b 100644
> --- a/dwarves_fprintf.c
> +++ b/dwarves_fprintf.c
> @@ -163,7 +163,7 @@ static const char *tag__accessibility(const struct tag *tag)
>  	return NULL;
>  }
>  
> -static size_t __tag__id_not_found_snprintf(char *bf, size_t len, uint16_t id,
> +static size_t __tag__id_not_found_snprintf(char *bf, size_t len, uint32_t id,
>  					   const char *fn, int line)
>  {
>  	return snprintf(bf, len, "<ERROR(%s:%d): %#llx not found!>", fn, line,
> @@ -425,7 +425,7 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu,
>  		return tag__ptr_name(tag, cu, bf, len, "&");
>  	case DW_TAG_ptr_to_member_type: {
>  		char suffix[512];
> -		uint16_t id = tag__ptr_to_member_type(tag)->containing_type;
> +		uint32_t id = tag__ptr_to_member_type(tag)->containing_type;
>  
>  		type = cu__type(cu, id);
>  		if (type != NULL)
> @@ -1213,7 +1213,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
>  	struct type *type = &class->type;
>  	size_t last_size = 0, size;
>  	uint8_t newline = 0;
> -	uint16_t nr_paddings = 0;
> +	uint32_t nr_paddings = 0;
>  	uint32_t sum = 0;
>  	uint32_t sum_holes = 0;
>  	uint32_t sum_paddings = 0;
> @@ -1225,7 +1225,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
>  	struct tag *tag_pos;
>  	const char *current_accessibility = NULL;
>  	struct conf_fprintf cconf = conf ? *conf : conf_fprintf__defaults;
> -	const uint16_t t = type->namespace.tag.tag;
> +	const uint32_t t = type->namespace.tag.tag;
>  	size_t printed = fprintf(fp, "%s%s%s%s%s",
>  				 cconf.prefix ?: "", cconf.prefix ? " " : "",
>  				 ((cconf.classes_as_structs ||
> diff --git a/pahole.c b/pahole.c
> index 2230e37..bec279c 100644
> --- a/pahole.c
> +++ b/pahole.c
> @@ -41,9 +41,9 @@ static size_t cu__exclude_prefix_len;
>  static char *decl_exclude_prefix;
>  static size_t decl_exclude_prefix_len;
>  
> -static uint16_t nr_holes;
> -static uint16_t nr_bit_holes;
> -static uint16_t hole_size_ge;
> +static uint32_t nr_holes;
> +static uint32_t nr_bit_holes;
> +static uint32_t hole_size_ge;
>  static uint8_t show_packable;
>  static uint8_t global_verbose;
>  static uint8_t recursive;
> @@ -156,7 +156,7 @@ static void nr_definitions_formatter(struct structure *st)
>  }
>  
>  static void nr_members_formatter(struct class *class,
> -				 struct cu *cu, uint16_t id __unused)
> +				 struct cu *cu, uint32_t id __unused)
>  {
>  	printf("%s%c%u\n", class__name(class, cu), separator,
>  	       class__nr_members(class));
> @@ -168,26 +168,26 @@ static void nr_methods_formatter(struct structure *st)
>  }
>  
>  static void size_formatter(struct class *class,
> -			   struct cu *cu, uint16_t id __unused)
> +			   struct cu *cu, uint32_t id __unused)
>  {
>  	printf("%s%c%d%c%u\n", class__name(class, cu), separator,
>  	       class__size(class), separator, class->nr_holes);
>  }
>  
>  static void class_name_len_formatter(struct class *class, struct cu *cu,
> -				     uint16_t id __unused)
> +				     uint32_t id __unused)
>  {
>  	const char *name = class__name(class, cu);
>  	printf("%s%c%zd\n", name, separator, strlen(name));
>  }
>  
>  static void class_name_formatter(struct class *class,
> -				 struct cu *cu, uint16_t id __unused)
> +				 struct cu *cu, uint32_t id __unused)
>  {
>  	puts(class__name(class, cu));
>  }
>  
> -static void class_formatter(struct class *class, struct cu *cu, uint16_t id)
> +static void class_formatter(struct class *class, struct cu *cu, uint32_t id)
>  {
>  	struct tag *typedef_alias = NULL;
>  	struct tag *tag = class__tag(class);
> @@ -224,7 +224,7 @@ static void class_formatter(struct class *class, struct cu *cu, uint16_t id)
>  	putchar('\n');
>  }
>  
> -static void print_packable_info(struct class *c, struct cu *cu, uint16_t id)
> +static void print_packable_info(struct class *c, struct cu *cu, uint32_t id)
>  {
>  	const struct tag *t = class__tag(c);
>  	const size_t orig_size = class__size(c);
> @@ -267,14 +267,14 @@ static void print_stats(void)
>  }
>  
>  static struct class *class__filter(struct class *class, struct cu *cu,
> -				   uint16_t tag_id);
> +				   uint32_t tag_id);
>  
>  static void (*formatter)(struct class *class,
> -			 struct cu *cu, uint16_t id) = class_formatter;
> +			 struct cu *cu, uint32_t id) = class_formatter;
>  
>  static void print_classes(struct cu *cu)
>  {
> -	uint16_t id;
> +	uint32_t id;
>  	struct class *pos;
>  
>  	cu__for_each_struct_or_union(cu, id, pos) {
> @@ -352,7 +352,7 @@ static int class__packable(struct class *class, struct cu *cu)
>  }
>  
>  static struct class *class__filter(struct class *class, struct cu *cu,
> -				   uint16_t tag_id)
> +				   uint32_t tag_id)
>  {
>  	struct tag *tag = class__tag(class);
>  	const char *name;
> @@ -608,7 +608,7 @@ static void cu_fixup_word_size_iterator(struct cu *cu)
>  	original_word_size = cu->addr_size;
>  	cu->addr_size = word_size;
>  
> -	uint16_t id;
> +	uint32_t id;
>  	struct tag *pos;
>  	cu__for_each_type(cu, id, pos)
>  		tag__fixup_word_size(pos, cu);
> @@ -659,11 +659,11 @@ static void cu__account_nr_methods(struct cu *cu)
>  
>  static char tab[128];
>  
> -static void print_structs_with_pointer_to(const struct cu *cu, uint16_t type)
> +static void print_structs_with_pointer_to(const struct cu *cu, uint32_t type)
>  {
>  	struct class *pos;
>  	struct class_member *pos_member;
> -	uint16_t id;
> +	uint32_t id;
>  
>  	cu__for_each_struct(cu, id, pos) {
>  		bool looked = false;
> @@ -702,10 +702,10 @@ static void print_structs_with_pointer_to(const struct cu *cu, uint16_t type)
>  	}
>  }
>  
> -static void print_containers(const struct cu *cu, uint16_t type, int ident)
> +static void print_containers(const struct cu *cu, uint32_t type, int ident)
>  {
>  	struct class *pos;
> -	uint16_t id;
> +	uint32_t id;
>  
>  	cu__for_each_struct(cu, id, pos) {
>  		if (pos->type.namespace.name == 0)
> -- 
> 2.17.1

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-07 14:02 ` Arnaldo Carvalho de Melo
@ 2019-03-07 14:09   ` Arnaldo Carvalho de Melo
  2019-03-07 14:11     ` Arnaldo Carvalho de Melo
  2019-03-08 19:39     ` Andrii Nakryiko
  2019-03-08  0:26   ` Arnaldo Carvalho de Melo
  1 sibling, 2 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-07 14:09 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: arnaldo.melo, andrii.nakryiko, ast, yhs, dwarves, bpf

Em Thu, Mar 07, 2019 at 11:02:47AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Mar 06, 2019 at 04:23:21PM -0800, Andrii Nakryiko escreveu:
> > Due to this assumption, libdwarves and other parts of pahole are using 16-bit
> > counters to iterate over entities within CU. This can cause infinite loop when
> > iterating BTF data, if there are more than 65535 types. This patch changes
> > non-public variables to use 32-bit integers, where appropriate.
 
> Ok, there are some bits that are unrelated, I'm comment on it and remove
> before applying.

Ooops, see below, I'm removing the non-type related parts, and will look
at changing the types till btfdiff with that allyesconfig aarch64
vmlinux image shows no diffs.

- Arnaldo
  
> - Arnaldo
>  
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> >  btf_loader.c      |  4 ++--
> >  dwarves.c         | 16 ++++++++--------
> >  dwarves_fprintf.c |  8 ++++----
> >  pahole.c          | 36 ++++++++++++++++++------------------
> >  4 files changed, 32 insertions(+), 32 deletions(-)
> > 
> > diff --git a/btf_loader.c b/btf_loader.c
> > index 867c608..3de774a 100644
> > --- a/btf_loader.c
> > +++ b/btf_loader.c
> > @@ -45,7 +45,7 @@ static void *tag__alloc(const size_t size)
> >  	return tag;
> >  }
> >  
> > -static int btf_elf__load_ftype(struct btf_elf *btfe, struct ftype *proto, uint16_t tag,
> > +static int btf_elf__load_ftype(struct btf_elf *btfe, struct ftype *proto, uint32_t tag,
> >  			       uint32_t type, uint16_t vlen, struct btf_param *args, long id)
> >  {
> >  	int i;
> > @@ -100,7 +100,7 @@ static struct base_type *base_type__new(strings_t name, uint32_t attrs,
> >  	return bt;
> >  }
> >  
> > -static void type__init(struct type *type, uint16_t tag,
> > +static void type__init(struct type *type, uint32_t tag,
> >  		       strings_t name, size_t size)
> >  {
> >  	INIT_LIST_HEAD(&type->node);
> > diff --git a/dwarves.c b/dwarves.c
> > index 111797b..3594ae3 100644
> > --- a/dwarves.c
> > +++ b/dwarves.c
> > @@ -349,7 +349,7 @@ reevaluate:
> >  
> >  static void cu__find_class_holes(struct cu *cu)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct class *pos;
> >  
> >  	cu__for_each_struct(cu, id, pos)
> > @@ -566,7 +566,7 @@ struct tag *cu__type(const struct cu *cu, const uint16_t id)
> >  struct tag *cu__find_first_typedef_of_type(const struct cu *cu,
> >  					   const uint16_t type)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  
> >  	if (cu == NULL || type == 0)
> > @@ -582,7 +582,7 @@ struct tag *cu__find_first_typedef_of_type(const struct cu *cu,
> >  struct tag *cu__find_base_type_by_name(const struct cu *cu,
> >  				       const char *name, uint16_t *idp)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  
> >  	if (cu == NULL || name == NULL)
> > @@ -611,7 +611,7 @@ struct tag *cu__find_base_type_by_sname_and_size(const struct cu *cu,
> >  						 uint16_t bit_size,
> >  						 uint16_t *idp)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  
> >  	if (sname == 0)
> > @@ -638,7 +638,7 @@ struct tag *cu__find_enumeration_by_sname_and_size(const struct cu *cu,
> >  						   uint16_t bit_size,
> >  						   uint16_t *idp)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  
> >  	if (sname == 0)
> > @@ -663,7 +663,7 @@ struct tag *cu__find_enumeration_by_sname_and_size(const struct cu *cu,
> >  struct tag *cu__find_struct_by_sname(const struct cu *cu, strings_t sname,
> >  				     const int include_decls, uint16_t *idp)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  
> >  	if (sname == 0)
> > @@ -699,7 +699,7 @@ static struct tag *__cu__find_struct_by_name(const struct cu *cu, const char *na
> >  	if (cu == NULL || name == NULL)
> >  		return NULL;
> >  
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  	cu__for_each_type(cu, id, pos) {
> >  		struct type *type;
> > @@ -1128,7 +1128,7 @@ void lexblock__add_label(struct lexblock *block, struct label *label)
> >  }
> >  
> >  const struct class_member *class__find_bit_hole(const struct class *class,
> > -					    const struct class_member *trailer,
> > +						const struct class_member *trailer,

This one is unrelated, so I'll remove from this patch.

> >  						const uint16_t bit_hole_size)
> >  {
> >  	struct class_member *pos;
> > diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c
> > index 490dc30..530c27b 100644
> > --- a/dwarves_fprintf.c
> > +++ b/dwarves_fprintf.c
> > @@ -163,7 +163,7 @@ static const char *tag__accessibility(const struct tag *tag)
> >  	return NULL;
> >  }
> >  
> > -static size_t __tag__id_not_found_snprintf(char *bf, size_t len, uint16_t id,
> > +static size_t __tag__id_not_found_snprintf(char *bf, size_t len, uint32_t id,
> >  					   const char *fn, int line)
> >  {
> >  	return snprintf(bf, len, "<ERROR(%s:%d): %#llx not found!>", fn, line,
> > @@ -425,7 +425,7 @@ static const char *__tag__name(const struct tag *tag, const struct cu *cu,
> >  		return tag__ptr_name(tag, cu, bf, len, "&");
> >  	case DW_TAG_ptr_to_member_type: {
> >  		char suffix[512];
> > -		uint16_t id = tag__ptr_to_member_type(tag)->containing_type;
> > +		uint32_t id = tag__ptr_to_member_type(tag)->containing_type;

Not strictly need as we haven't changed ptr_to_member_type's 'containing_type'
to uint32_t, it remains uint16_t as it is part of the ABI, right?
 
> >  		type = cu__type(cu, id);
> >  		if (type != NULL)
> > @@ -1213,7 +1213,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
> >  	struct type *type = &class->type;
> >  	size_t last_size = 0, size;
> >  	uint8_t newline = 0;
> > -	uint16_t nr_paddings = 0;
> > +	uint32_t nr_paddings = 0;

No need to convert this one as this is not a type id?

> >  	uint32_t sum = 0;
> >  	uint32_t sum_holes = 0;
> >  	uint32_t sum_paddings = 0;
> > @@ -1225,7 +1225,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
> >  	struct tag *tag_pos;
> >  	const char *current_accessibility = NULL;
> >  	struct conf_fprintf cconf = conf ? *conf : conf_fprintf__defaults;
> > -	const uint16_t t = type->namespace.tag.tag;
> > +	const uint32_t t = type->namespace.tag.tag;

Ditto

> >  	size_t printed = fprintf(fp, "%s%s%s%s%s",
> >  				 cconf.prefix ?: "", cconf.prefix ? " " : "",
> >  				 ((cconf.classes_as_structs ||
> > diff --git a/pahole.c b/pahole.c
> > index 2230e37..bec279c 100644
> > --- a/pahole.c
> > +++ b/pahole.c
> > @@ -41,9 +41,9 @@ static size_t cu__exclude_prefix_len;
> >  static char *decl_exclude_prefix;
> >  static size_t decl_exclude_prefix_len;
> >  
> > -static uint16_t nr_holes;
> > -static uint16_t nr_bit_holes;
> > -static uint16_t hole_size_ge;
> > +static uint32_t nr_holes;
> > +static uint32_t nr_bit_holes;
> > +static uint32_t hole_size_ge;

Ditto for these 3

> >  static uint8_t show_packable;
> >  static uint8_t global_verbose;
> >  static uint8_t recursive;
> > @@ -156,7 +156,7 @@ static void nr_definitions_formatter(struct structure *st)
> >  }
> >  
> >  static void nr_members_formatter(struct class *class,
> > -				 struct cu *cu, uint16_t id __unused)
> > +				 struct cu *cu, uint32_t id __unused)
> >  {
> >  	printf("%s%c%u\n", class__name(class, cu), separator,
> >  	       class__nr_members(class));
> > @@ -168,26 +168,26 @@ static void nr_methods_formatter(struct structure *st)
> >  }
> >  
> >  static void size_formatter(struct class *class,
> > -			   struct cu *cu, uint16_t id __unused)
> > +			   struct cu *cu, uint32_t id __unused)
> >  {
> >  	printf("%s%c%d%c%u\n", class__name(class, cu), separator,
> >  	       class__size(class), separator, class->nr_holes);
> >  }
> >  
> >  static void class_name_len_formatter(struct class *class, struct cu *cu,
> > -				     uint16_t id __unused)
> > +				     uint32_t id __unused)
> >  {
> >  	const char *name = class__name(class, cu);
> >  	printf("%s%c%zd\n", name, separator, strlen(name));
> >  }
> >  
> >  static void class_name_formatter(struct class *class,
> > -				 struct cu *cu, uint16_t id __unused)
> > +				 struct cu *cu, uint32_t id __unused)
> >  {
> >  	puts(class__name(class, cu));
> >  }
> >  
> > -static void class_formatter(struct class *class, struct cu *cu, uint16_t id)
> > +static void class_formatter(struct class *class, struct cu *cu, uint32_t id)
> >  {
> >  	struct tag *typedef_alias = NULL;
> >  	struct tag *tag = class__tag(class);
> > @@ -224,7 +224,7 @@ static void class_formatter(struct class *class, struct cu *cu, uint16_t id)
> >  	putchar('\n');
> >  }
> >  
> > -static void print_packable_info(struct class *c, struct cu *cu, uint16_t id)
> > +static void print_packable_info(struct class *c, struct cu *cu, uint32_t id)
> >  {
> >  	const struct tag *t = class__tag(c);
> >  	const size_t orig_size = class__size(c);
> > @@ -267,14 +267,14 @@ static void print_stats(void)
> >  }
> >  
> >  static struct class *class__filter(struct class *class, struct cu *cu,
> > -				   uint16_t tag_id);
> > +				   uint32_t tag_id);
> >  
> >  static void (*formatter)(struct class *class,
> > -			 struct cu *cu, uint16_t id) = class_formatter;
> > +			 struct cu *cu, uint32_t id) = class_formatter;
> >  
> >  static void print_classes(struct cu *cu)
> >  {
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct class *pos;
> >  
> >  	cu__for_each_struct_or_union(cu, id, pos) {
> > @@ -352,7 +352,7 @@ static int class__packable(struct class *class, struct cu *cu)
> >  }
> >  
> >  static struct class *class__filter(struct class *class, struct cu *cu,
> > -				   uint16_t tag_id)
> > +				   uint32_t tag_id)
> >  {
> >  	struct tag *tag = class__tag(class);
> >  	const char *name;
> > @@ -608,7 +608,7 @@ static void cu_fixup_word_size_iterator(struct cu *cu)
> >  	original_word_size = cu->addr_size;
> >  	cu->addr_size = word_size;
> >  
> > -	uint16_t id;
> > +	uint32_t id;
> >  	struct tag *pos;
> >  	cu__for_each_type(cu, id, pos)
> >  		tag__fixup_word_size(pos, cu);
> > @@ -659,11 +659,11 @@ static void cu__account_nr_methods(struct cu *cu)
> >  
> >  static char tab[128];
> >  
> > -static void print_structs_with_pointer_to(const struct cu *cu, uint16_t type)
> > +static void print_structs_with_pointer_to(const struct cu *cu, uint32_t type)
> >  {
> >  	struct class *pos;
> >  	struct class_member *pos_member;
> > -	uint16_t id;
> > +	uint32_t id;
> >  
> >  	cu__for_each_struct(cu, id, pos) {
> >  		bool looked = false;
> > @@ -702,10 +702,10 @@ static void print_structs_with_pointer_to(const struct cu *cu, uint16_t type)
> >  	}
> >  }
> >  
> > -static void print_containers(const struct cu *cu, uint16_t type, int ident)
> > +static void print_containers(const struct cu *cu, uint32_t type, int ident)
> >  {
> >  	struct class *pos;
> > -	uint16_t id;
> > +	uint32_t id;
> >  
> >  	cu__for_each_struct(cu, id, pos) {
> >  		if (pos->type.namespace.name == 0)
> > -- 
> > 2.17.1
> 
> -- 
> 
> - Arnaldo

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-07 14:09   ` Arnaldo Carvalho de Melo
@ 2019-03-07 14:11     ` Arnaldo Carvalho de Melo
  2019-03-08 19:39     ` Andrii Nakryiko
  1 sibling, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-07 14:11 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: arnaldo.melo, andrii.nakryiko, ast, yhs, dwarves, bpf

Em Thu, Mar 07, 2019 at 11:09:17AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, Mar 07, 2019 at 11:02:47AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Wed, Mar 06, 2019 at 04:23:21PM -0800, Andrii Nakryiko escreveu:
> > > Due to this assumption, libdwarves and other parts of pahole are using 16-bit
> > > counters to iterate over entities within CU. This can cause infinite loop when
> > > iterating BTF data, if there are more than 65535 types. This patch changes
> > > non-public variables to use 32-bit integers, where appropriate.
>  
> > Ok, there are some bits that are unrelated, I'm comment on it and remove
> > before applying.
> 
> Ooops, see below, I'm removing the non-type related parts, and will look
> at changing the types till btfdiff with that allyesconfig aarch64
> vmlinux image shows no diffs.

This one as well is not needed:

@@ -1225,7 +1225,7 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
        struct tag *tag_pos;
        const char *current_accessibility = NULL;
        struct conf_fprintf cconf = conf ? *conf : conf_fprintf__defaults;
-       const uint16_t t = type->namespace.tag.tag;
+       const uint32_t t = type->namespace.tag.tag;
        size_t printed = fprintf(fp, "%s%s%s%s%s",
                                 cconf.prefix ?: "", cconf.prefix ? " " : "",
                                 ((cconf.classes_as_structs ||


As this is just for the type of the tag, not its type, i.e. its about
DW_TAG_struct_type, etc.

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-07 14:02 ` Arnaldo Carvalho de Melo
  2019-03-07 14:09   ` Arnaldo Carvalho de Melo
@ 2019-03-08  0:26   ` Arnaldo Carvalho de Melo
  2019-03-08 18:45     ` Arnaldo Carvalho de Melo
  1 sibling, 1 reply; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-08  0:26 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Andrii Nakryiko, Alexei Starovoitov, Yonghong Song, Song Liu,
	Martin Lau, Jiri Olsa, Namhyung Kim, dwarves, bpf

Em Thu, Mar 07, 2019 at 11:02:47AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Mar 06, 2019 at 04:23:21PM -0800, Andrii Nakryiko escreveu:
> > Existing code base assumes that single CU doesn't have more than 65535 types
> > per each CU, which might be a reasonable assumption for DWARF data. With BTF,
> > though, all we get is single, potentially huge, CU which can easily have more
> > than 65k types. For example, this is the case for allyesconfig version of
> > Linux kernel, which has >200k types.
> 
> And your patch seems to be a step in removing this limitation, thanks
> for working on it.
>  
> > Due to this assumption, libdwarves and other parts of pahole are using 16-bit
> > counters to iterate over entities within CU. This can cause infinite loop when
> > iterating BTF data, if there are more than 65535 types. This patch changes
> > non-public variables to use 32-bit integers, where appropriate.

So, I've worked on this today and pushed a tmp branch for you guys to
check, please try what is in:

git://git.kernel.org/pub/scm/devel/pahole/pahole.git  branch tmp.type_id_t-as-uint32_t

Since I had to change the type in several functions and structs, I'll
bump the soname to have a libdwarves2, etc.

With it, and with that vmlinux.aarch64 built with allmodconfig, we're
down to these cases, and I re-run 'pahoje -J examples/vmlinux-aarch64':

[acme@quaco pahole]$ file examples/vmlinux-aarch64
examples/vmlinux-aarch64: ELF 64-bit MSB pie executable, ARM aarch64, version 1 (SYSV), statically linked, BuildID[sha1]=7fcff18ec214eee2f8c6b288fca21dc6d9771ad6, with debug_info, not stripped
[acme@quaco pahole]$ size examples/vmlinux-aarch64
   text	   data	    bss	    dec	    hex	filename
238902279	138150199	28423136	405475614	182b111e	examples/vmlinux-aarch64
[acme@quaco pahole]$ ls -lah examples/vmlinux-aarch64
-rwxrwxr-x. 1 acme acme 2.3G Mar  7 18:17 examples/vmlinux-aarch64
[acme@quaco pahole]$ pahole -F btf --sizes examples/vmlinux-aarch64 | wc -l
51023
[acme@quaco pahole]$

Wow! 51023 unique structs and unions, w00t :-)

Several looks similar and the low hanging fruit to investigate, seems to
be enum bitfields, and the others may as well end up being the same, in
miscalculated stats for structs embedded in other structs:

$ btfdiff examples/vmlinux-aarch64
--- /tmp/btfdiff.dwarf.81KCPb	2019-03-07 18:20:13.153319625 -0300
+++ /tmp/btfdiff.btf.g1QkcZ	2019-03-07 18:20:13.928328675 -0300
@@ -306626,18 +306626,15 @@ struct myrb_enquiry2 {
 			MYRB_RAM_TYPE_EDO = 1,
 			MYRB_RAM_TYPE_SDRAM = 2,
 			MYRB_RAM_TYPE_Last = 7,
-		} ram:3;                                   /*    40     4 */
+		} ram:3;                                   /*    40     1 */
 		enum {
 			MYRB_ERR_CORR_None = 0,
 			MYRB_ERR_CORR_Parity = 1,
 			MYRB_ERR_CORR_ECC = 2,
 			MYRB_ERR_CORR_Last = 7,
-		} ec:3;                                    /*    40     4 */
+		} ec:3;                                    /*    40     1 */
 		unsigned char      fast_page:1;          /*    40: 6  1 */
 		unsigned char      low_power:1;          /*    40: 7  1 */
-
-		/* Bitfield combined with next fields */
-
 		unsigned char      rsvd4;                /*    41     1 */
 	} mem_type;                                      /*    40     2 */
 	short unsigned int         clock_speed;          /*    42     2 */
@@ -306668,18 +306665,15 @@ struct myrb_enquiry2 {
 			MYRB_WIDTH_NARROW_8BIT = 0,
 			MYRB_WIDTH_WIDE_16BIT = 1,
 			MYRB_WIDTH_WIDE_32BIT = 2,
-		} bus_width:2;                             /*   106     4 */
+		} bus_width:2;                             /*   106     1 */
 		enum {
 			MYRB_SCSI_SPEED_FAST = 0,
 			MYRB_SCSI_SPEED_ULTRA = 1,
 			MYRB_SCSI_SPEED_ULTRA2 = 2,
-		} bus_speed:2;                             /*   106     4 */
+		} bus_speed:2;                             /*   106     1 */
 		unsigned char      differential:1;       /*   106: 4  1 */
 		unsigned char      rsvd10:3;             /*   106: 5  1 */
 	} scsi_cap;                                      /*   106     1 */
-
-	/* XXX last struct has 65533 bytes of padding */
-
 	unsigned char              rsvd11[5];            /*   107     5 */
 	short unsigned int         fw_build;             /*   112     2 */
 	enum {
@@ -306701,7 +306695,6 @@ struct myrb_enquiry2 {
 	unsigned char              rsvd14[8];            /*   120     8 */
 
 	/* size: 128, cachelines: 2, members: 46 */
-	/* paddings: 1, sum paddings: 65533 */
 };
 struct myrb_ldev_info {
 	unsigned int               size;                 /*     0     4 */
@@ -306741,7 +306734,7 @@ struct myrb_pdev_state {
 		MYRB_TYPE_DISK = 1,
 		MYRB_TYPE_TAPE = 2,
 		MYRB_TYPE_CDROM_OR_WORM = 3,
-	} devtype:2;                                       /*     1     4 */
+	} devtype:2;                                       /*     1     1 */
 
 	/* Bitfield combined with previous fields */
 
@@ -306868,7 +306861,7 @@ struct myrb_config2 {
 			MYRB_SPEED_SYNC_8MHz = 1,
 			MYRB_SPEED_SYNC_5MHz = 2,
 			MYRB_SPEED_SYNC_10_OR_20MHz = 3,
-		} speed:2;                                 /*    12     4 */
+		} speed:2;                                 /*    12     1 */
 		unsigned int       force_8bit:1;         /*    12: 2  4 */
 		unsigned int       disable_fast20:1;     /*    12: 3  4 */
 		unsigned int       rsvd8:3;              /*    12: 4  4 */
@@ -306891,7 +306884,7 @@ struct myrb_config2 {
 		MYRB_GEOM_255_63 = 1,
 		MYRB_GEOM_RESERVED1 = 2,
 		MYRB_GEOM_RESERVED2 = 3,
-	} drive_geometry:2;                                /*    52     4 */
+	} drive_geometry:2;                                /*    52     1 */
 	unsigned int               rsvd12:1;             /*    52: 7  4 */
 
 	/* Bitfield combined with next fields */
@@ -306913,7 +306906,7 @@ struct myrb_dcdb {
 		MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
 		MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
 		MYRB_DCDB_XFER_ILLEGAL = 3,
-	} data_xfer:2;                                     /*     1     4 */
+	} data_xfer:2;                                     /*     1     1 */
 
 	/* Bitfield combined with previous fields */
 
@@ -306928,7 +306921,7 @@ struct myrb_dcdb {
 		MYRB_DCDB_TMO_10_SECS = 1,
 		MYRB_DCDB_TMO_60_SECS = 2,
 		MYRB_DCDB_TMO_10_MINS = 3,
-	} timeout:2;                                       /*     1     4 */
+	} timeout:2;                                       /*     1     1 */
 
 	/* Bitfield combined with previous fields */
 
@@ -307053,7 +307046,7 @@ union myrb_cmd_mbox {
 			MYRB_SGL_ADDR32_COUNT16 = 1,
 			MYRB_SGL_COUNT32_ADDR32 = 2,
 			MYRB_SGL_COUNT16_ADDR32 = 3,
-		} sg_type:2;                             /*    12     4 */
+		} sg_type:2;                             /*    12     1 */
 		unsigned char      rsvd[3];            /*    13     3 */
 	} type5;                                       /*     0    16 */
 	struct {
@@ -453415,15 +453408,10 @@ struct ieee80211_tx_rate {
 	/* Bitfield combined with previous fields */
 
 	u16                        count:5;              /*     0: 8  2 */
-
-	/* XXX 3 bits hole, try to pack */
-	/* Bitfield combined with next fields */
-
-	u16                        flags:11;             /*     1: 5  2 */
+	u16                        flags:11;             /*     0:13  2 */
 
 	/* size: 3, cachelines: 1, members: 3 */
-	/* bit holes: 1, sum bit holes: 3 bits */
-	/* bit_padding: 5 bits */
+	/* padding: 1 */
 	/* last cacheline: 3 bytes */
 };
 struct ieee80211_key_conf {
@@ -488743,6 +488731,9 @@ struct ieee80211_tx_rate_control {
 	struct ieee80211_bss_conf * bss_conf;            /*    16     8 */
 	struct sk_buff *           skb;                  /*    24     8 */
 	struct ieee80211_tx_rate   reported_rate;        /*    32     3 */
+
+	/* XXX last struct has 1 byte of padding */
+
 	bool                       rts;                  /*    35     1 */
 	bool                       short_preamble;       /*    36     1 */
 
@@ -488758,6 +488749,7 @@ struct ieee80211_tx_rate_control {
 	/* size: 64, cachelines: 1, members: 10 */
 	/* sum members: 50, holes: 2, sum holes: 7 */
 	/* padding: 7 */
+	/* paddings: 1, sum paddings: 1 */
 };
 struct rate_control_ops {
 	long unsigned int          capa;                 /*     0     8 */
@@ -528758,7 +528750,7 @@ struct skb_pool {
 };
 struct vc_map {
 	volatile unsigned int      tx:1;                 /*     0: 0  4 */
-	volatile unsigned int      rx:1;                 /*     0: 1  4 */
+	volatile unsigned int      rx:1;                 /*     0: 0  4 */
 
 	/* XXX 30 bits hole, try to pack */
 	/* XXX 4 bytes hole, try to pack */
@@ -675819,7 +675811,10 @@ union zip_zres_s {
 		u64                exbits:7;           /*    16:41  8 */
 		u64                reserved_137_143:7; /*    16:48  8 */
 		u64                ef:1;               /*    16:55  8 */
-		volatile u64       compcode:8;         /*    16:56  8 */
+
+		/* Bitfield combined with next fields */
+
+		volatile u64       compcode:8;         /*    23: 0  8 */
 	} s;                                           /*     0    24 */
 };
 struct sg_info {
@@ -784966,12 +784961,14 @@ struct ieee80211_tx_data {
 	struct ieee80211_key *     key;                  /*   128     8 */
 	struct ieee80211_tx_rate   rate;                 /*   136     3 */
 
+	/* XXX last struct has 1 byte of padding */
 	/* XXX 1 byte hole, try to pack */
 
 	unsigned int               flags;                /*   140     4 */
 
 	/* size: 144, cachelines: 3, members: 8 */
 	/* sum members: 143, holes: 1, sum holes: 1 */
+	/* paddings: 1, sum paddings: 1 */
 	/* last cacheline: 16 bytes */
 };
 struct ieee80211_rx_data {


For a more usual vmlinux, with a .config similar to the one in a fedora
distro, we get everything seemingly ok:

[acme@quaco pahole]$ btfdiff vmlinux
[acme@quaco pahole]$ file vmlinux
vmlinux: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=e7b4562926e9f69b2f478a3b4a7dd33136f42f5d, with debug_info, not stripped
[acme@quaco pahole]$ size vmlinux
   text	   data	    bss	    dec	    hex	filename
17124425	7038546	5378112	29541083	1c2c2db	vmlinux
[acme@quaco pahole]$ ls -lah vmlinux
-rwxrwxr-x. 1 acme acme 621M Mar  7 15:40 vmlinux
[acme@quaco pahole]$ pahole -F btf --sizes vmlinux | wc -l
7472
[acme@quaco pahole]$ pahole -F dwarf --sizes vmlinux | wc -l
7445
[acme@quaco pahole]$ pahole -F dwarf --show_private_classes --sizes vmlinux | wc -l
7472
[acme@quaco pahole]$
[acme@quaco pahole]$ pahole -F btf -C perf_event_attr vmlinux
struct perf_event_attr {
	__u32                      type;                 /*     0     4 */
	__u32                      size;                 /*     4     4 */
	__u64                      config;               /*     8     8 */
	union {
		__u64              sample_period;        /*    16     8 */
		__u64              sample_freq;          /*    16     8 */
	};                                               /*    16     8 */
	__u64                      sample_type;          /*    24     8 */
	__u64                      read_format;          /*    32     8 */
	__u64                      disabled:1;           /*    40:63  8 */
	__u64                      inherit:1;            /*    40:62  8 */
	__u64                      pinned:1;             /*    40:61  8 */
	__u64                      exclusive:1;          /*    40:60  8 */
	__u64                      exclude_user:1;       /*    40:59  8 */
	__u64                      exclude_kernel:1;     /*    40:58  8 */
	__u64                      exclude_hv:1;         /*    40:57  8 */
	__u64                      exclude_idle:1;       /*    40:56  8 */
	__u64                      mmap:1;               /*    40:55  8 */
	__u64                      comm:1;               /*    40:54  8 */
	__u64                      freq:1;               /*    40:53  8 */
	__u64                      inherit_stat:1;       /*    40:52  8 */
	__u64                      enable_on_exec:1;     /*    40:51  8 */
	__u64                      task:1;               /*    40:50  8 */
	__u64                      watermark:1;          /*    40:49  8 */
	__u64                      precise_ip:2;         /*    40:47  8 */
	__u64                      mmap_data:1;          /*    40:46  8 */
	__u64                      sample_id_all:1;      /*    40:45  8 */
	__u64                      exclude_host:1;       /*    40:44  8 */
	__u64                      exclude_guest:1;      /*    40:43  8 */
	__u64                      exclude_callchain_kernel:1; /*    40:42  8 */
	__u64                      exclude_callchain_user:1; /*    40:41  8 */
	__u64                      mmap2:1;              /*    40:40  8 */
	__u64                      comm_exec:1;          /*    40:39  8 */
	__u64                      use_clockid:1;        /*    40:38  8 */
	__u64                      context_switch:1;     /*    40:37  8 */
	__u64                      write_backward:1;     /*    40:36  8 */
	__u64                      namespaces:1;         /*    40:35  8 */
	__u64                      ksymbol:1;            /*    40:34  8 */
	__u64                      bpf_event:1;          /*    40:33  8 */
	__u64                      __reserved_1:33;      /*    40: 0  8 */
	union {
		__u32              wakeup_events;        /*    48     4 */
		__u32              wakeup_watermark;     /*    48     4 */
	};                                               /*    48     4 */
	__u32                      bp_type;              /*    52     4 */
	union {
		__u64              bp_addr;              /*    56     8 */
		__u64              kprobe_func;          /*    56     8 */
		__u64              uprobe_path;          /*    56     8 */
		__u64              config1;              /*    56     8 */
	};                                               /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	union {
		__u64              bp_len;               /*    64     8 */
		__u64              kprobe_addr;          /*    64     8 */
		__u64              probe_offset;         /*    64     8 */
		__u64              config2;              /*    64     8 */
	};                                               /*    64     8 */
	__u64                      branch_sample_type;   /*    72     8 */
	__u64                      sample_regs_user;     /*    80     8 */
	__u32                      sample_stack_user;    /*    88     4 */
	__s32                      clockid;              /*    92     4 */
	__u64                      sample_regs_intr;     /*    96     8 */
	__u32                      aux_watermark;        /*   104     4 */
	__u16                      sample_max_stack;     /*   108     2 */
	__u16                      __reserved_2;         /*   110     2 */

	/* size: 112, cachelines: 2, members: 49 */
	/* last cacheline: 48 bytes */
};
[acme@quaco pahole]$

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-08  0:26   ` Arnaldo Carvalho de Melo
@ 2019-03-08 18:45     ` Arnaldo Carvalho de Melo
  2019-03-08 19:42       ` Andrii Nakryiko
  0 siblings, 1 reply; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-08 18:45 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Andrii Nakryiko, Alexei Starovoitov, Yonghong Song, Song Liu,
	Martin Lau, Jiri Olsa, Namhyung Kim, dwarves, bpf

Em Thu, Mar 07, 2019 at 09:26:37PM -0300, Arnaldo Carvalho de Melo escreveu:
> Several looks similar and the low hanging fruit to investigate, seems to
> be enum bitfields, and the others may as well end up being the same, in
> miscalculated stats for structs embedded in other structs:

I had little time to further look into this, but from what I've seen the
good news is that it seems the problem is with the DWARF loader, the BTF
one producing the right results 8-) I'll take a look at a fix you made
for packed enums and probably use the same thing on the DWARF loader.

- Arnaldo
 
> $ btfdiff examples/vmlinux-aarch64
> --- /tmp/btfdiff.dwarf.81KCPb	2019-03-07 18:20:13.153319625 -0300
> +++ /tmp/btfdiff.btf.g1QkcZ	2019-03-07 18:20:13.928328675 -0300
> @@ -306626,18 +306626,15 @@ struct myrb_enquiry2 {
>  			MYRB_RAM_TYPE_EDO = 1,
>  			MYRB_RAM_TYPE_SDRAM = 2,
>  			MYRB_RAM_TYPE_Last = 7,
> -		} ram:3;                                   /*    40     4 */
> +		} ram:3;                                   /*    40     1 */
>  		enum {
>  			MYRB_ERR_CORR_None = 0,
>  			MYRB_ERR_CORR_Parity = 1,
>  			MYRB_ERR_CORR_ECC = 2,
>  			MYRB_ERR_CORR_Last = 7,
> -		} ec:3;                                    /*    40     4 */
> +		} ec:3;                                    /*    40     1 */
>  		unsigned char      fast_page:1;          /*    40: 6  1 */
>  		unsigned char      low_power:1;          /*    40: 7  1 */
> -
> -		/* Bitfield combined with next fields */
> -
>  		unsigned char      rsvd4;                /*    41     1 */
>  	} mem_type;                                      /*    40     2 */
>  	short unsigned int         clock_speed;          /*    42     2 */
> @@ -306668,18 +306665,15 @@ struct myrb_enquiry2 {
>  			MYRB_WIDTH_NARROW_8BIT = 0,
>  			MYRB_WIDTH_WIDE_16BIT = 1,
>  			MYRB_WIDTH_WIDE_32BIT = 2,
> -		} bus_width:2;                             /*   106     4 */
> +		} bus_width:2;                             /*   106     1 */
>  		enum {
>  			MYRB_SCSI_SPEED_FAST = 0,
>  			MYRB_SCSI_SPEED_ULTRA = 1,
>  			MYRB_SCSI_SPEED_ULTRA2 = 2,
> -		} bus_speed:2;                             /*   106     4 */
> +		} bus_speed:2;                             /*   106     1 */
>  		unsigned char      differential:1;       /*   106: 4  1 */
>  		unsigned char      rsvd10:3;             /*   106: 5  1 */
>  	} scsi_cap;                                      /*   106     1 */
> -
> -	/* XXX last struct has 65533 bytes of padding */
> -
>  	unsigned char              rsvd11[5];            /*   107     5 */
>  	short unsigned int         fw_build;             /*   112     2 */
>  	enum {
> @@ -306701,7 +306695,6 @@ struct myrb_enquiry2 {
>  	unsigned char              rsvd14[8];            /*   120     8 */
>  
>  	/* size: 128, cachelines: 2, members: 46 */
> -	/* paddings: 1, sum paddings: 65533 */
>  };
>  struct myrb_ldev_info {
>  	unsigned int               size;                 /*     0     4 */
> @@ -306741,7 +306734,7 @@ struct myrb_pdev_state {
>  		MYRB_TYPE_DISK = 1,
>  		MYRB_TYPE_TAPE = 2,
>  		MYRB_TYPE_CDROM_OR_WORM = 3,
> -	} devtype:2;                                       /*     1     4 */
> +	} devtype:2;                                       /*     1     1 */
>  
>  	/* Bitfield combined with previous fields */
>  
> @@ -306868,7 +306861,7 @@ struct myrb_config2 {
>  			MYRB_SPEED_SYNC_8MHz = 1,
>  			MYRB_SPEED_SYNC_5MHz = 2,
>  			MYRB_SPEED_SYNC_10_OR_20MHz = 3,
> -		} speed:2;                                 /*    12     4 */
> +		} speed:2;                                 /*    12     1 */
>  		unsigned int       force_8bit:1;         /*    12: 2  4 */
>  		unsigned int       disable_fast20:1;     /*    12: 3  4 */
>  		unsigned int       rsvd8:3;              /*    12: 4  4 */
> @@ -306891,7 +306884,7 @@ struct myrb_config2 {
>  		MYRB_GEOM_255_63 = 1,
>  		MYRB_GEOM_RESERVED1 = 2,
>  		MYRB_GEOM_RESERVED2 = 3,
> -	} drive_geometry:2;                                /*    52     4 */
> +	} drive_geometry:2;                                /*    52     1 */
>  	unsigned int               rsvd12:1;             /*    52: 7  4 */
>  
>  	/* Bitfield combined with next fields */
> @@ -306913,7 +306906,7 @@ struct myrb_dcdb {
>  		MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
>  		MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
>  		MYRB_DCDB_XFER_ILLEGAL = 3,
> -	} data_xfer:2;                                     /*     1     4 */
> +	} data_xfer:2;                                     /*     1     1 */
>  
>  	/* Bitfield combined with previous fields */
>  
> @@ -306928,7 +306921,7 @@ struct myrb_dcdb {
>  		MYRB_DCDB_TMO_10_SECS = 1,
>  		MYRB_DCDB_TMO_60_SECS = 2,
>  		MYRB_DCDB_TMO_10_MINS = 3,
> -	} timeout:2;                                       /*     1     4 */
> +	} timeout:2;                                       /*     1     1 */
>  
>  	/* Bitfield combined with previous fields */
>  
> @@ -307053,7 +307046,7 @@ union myrb_cmd_mbox {
>  			MYRB_SGL_ADDR32_COUNT16 = 1,
>  			MYRB_SGL_COUNT32_ADDR32 = 2,
>  			MYRB_SGL_COUNT16_ADDR32 = 3,
> -		} sg_type:2;                             /*    12     4 */
> +		} sg_type:2;                             /*    12     1 */
>  		unsigned char      rsvd[3];            /*    13     3 */
>  	} type5;                                       /*     0    16 */
>  	struct {
> @@ -453415,15 +453408,10 @@ struct ieee80211_tx_rate {
>  	/* Bitfield combined with previous fields */
>  
>  	u16                        count:5;              /*     0: 8  2 */
> -
> -	/* XXX 3 bits hole, try to pack */
> -	/* Bitfield combined with next fields */
> -
> -	u16                        flags:11;             /*     1: 5  2 */
> +	u16                        flags:11;             /*     0:13  2 */
>  
>  	/* size: 3, cachelines: 1, members: 3 */
> -	/* bit holes: 1, sum bit holes: 3 bits */
> -	/* bit_padding: 5 bits */
> +	/* padding: 1 */
>  	/* last cacheline: 3 bytes */
>  };
>  struct ieee80211_key_conf {
> @@ -488743,6 +488731,9 @@ struct ieee80211_tx_rate_control {
>  	struct ieee80211_bss_conf * bss_conf;            /*    16     8 */
>  	struct sk_buff *           skb;                  /*    24     8 */
>  	struct ieee80211_tx_rate   reported_rate;        /*    32     3 */
> +
> +	/* XXX last struct has 1 byte of padding */
> +
>  	bool                       rts;                  /*    35     1 */
>  	bool                       short_preamble;       /*    36     1 */
>  
> @@ -488758,6 +488749,7 @@ struct ieee80211_tx_rate_control {
>  	/* size: 64, cachelines: 1, members: 10 */
>  	/* sum members: 50, holes: 2, sum holes: 7 */
>  	/* padding: 7 */
> +	/* paddings: 1, sum paddings: 1 */
>  };
>  struct rate_control_ops {
>  	long unsigned int          capa;                 /*     0     8 */
> @@ -528758,7 +528750,7 @@ struct skb_pool {
>  };
>  struct vc_map {
>  	volatile unsigned int      tx:1;                 /*     0: 0  4 */
> -	volatile unsigned int      rx:1;                 /*     0: 1  4 */
> +	volatile unsigned int      rx:1;                 /*     0: 0  4 */
>  
>  	/* XXX 30 bits hole, try to pack */
>  	/* XXX 4 bytes hole, try to pack */
> @@ -675819,7 +675811,10 @@ union zip_zres_s {
>  		u64                exbits:7;           /*    16:41  8 */
>  		u64                reserved_137_143:7; /*    16:48  8 */
>  		u64                ef:1;               /*    16:55  8 */
> -		volatile u64       compcode:8;         /*    16:56  8 */
> +
> +		/* Bitfield combined with next fields */
> +
> +		volatile u64       compcode:8;         /*    23: 0  8 */
>  	} s;                                           /*     0    24 */
>  };
>  struct sg_info {
> @@ -784966,12 +784961,14 @@ struct ieee80211_tx_data {
>  	struct ieee80211_key *     key;                  /*   128     8 */
>  	struct ieee80211_tx_rate   rate;                 /*   136     3 */
>  
> +	/* XXX last struct has 1 byte of padding */
>  	/* XXX 1 byte hole, try to pack */
>  
>  	unsigned int               flags;                /*   140     4 */
>  
>  	/* size: 144, cachelines: 3, members: 8 */
>  	/* sum members: 143, holes: 1, sum holes: 1 */
> +	/* paddings: 1, sum paddings: 1 */
>  	/* last cacheline: 16 bytes */
>  };
>  struct ieee80211_rx_data {
> 
> 
> For a more usual vmlinux, with a .config similar to the one in a fedora
> distro, we get everything seemingly ok:
> 
> [acme@quaco pahole]$ btfdiff vmlinux
> [acme@quaco pahole]$ file vmlinux
> vmlinux: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=e7b4562926e9f69b2f478a3b4a7dd33136f42f5d, with debug_info, not stripped
> [acme@quaco pahole]$ size vmlinux
>    text	   data	    bss	    dec	    hex	filename
> 17124425	7038546	5378112	29541083	1c2c2db	vmlinux
> [acme@quaco pahole]$ ls -lah vmlinux
> -rwxrwxr-x. 1 acme acme 621M Mar  7 15:40 vmlinux
> [acme@quaco pahole]$ pahole -F btf --sizes vmlinux | wc -l
> 7472
> [acme@quaco pahole]$ pahole -F dwarf --sizes vmlinux | wc -l
> 7445
> [acme@quaco pahole]$ pahole -F dwarf --show_private_classes --sizes vmlinux | wc -l
> 7472
> [acme@quaco pahole]$
> [acme@quaco pahole]$ pahole -F btf -C perf_event_attr vmlinux
> struct perf_event_attr {
> 	__u32                      type;                 /*     0     4 */
> 	__u32                      size;                 /*     4     4 */
> 	__u64                      config;               /*     8     8 */
> 	union {
> 		__u64              sample_period;        /*    16     8 */
> 		__u64              sample_freq;          /*    16     8 */
> 	};                                               /*    16     8 */
> 	__u64                      sample_type;          /*    24     8 */
> 	__u64                      read_format;          /*    32     8 */
> 	__u64                      disabled:1;           /*    40:63  8 */
> 	__u64                      inherit:1;            /*    40:62  8 */
> 	__u64                      pinned:1;             /*    40:61  8 */
> 	__u64                      exclusive:1;          /*    40:60  8 */
> 	__u64                      exclude_user:1;       /*    40:59  8 */
> 	__u64                      exclude_kernel:1;     /*    40:58  8 */
> 	__u64                      exclude_hv:1;         /*    40:57  8 */
> 	__u64                      exclude_idle:1;       /*    40:56  8 */
> 	__u64                      mmap:1;               /*    40:55  8 */
> 	__u64                      comm:1;               /*    40:54  8 */
> 	__u64                      freq:1;               /*    40:53  8 */
> 	__u64                      inherit_stat:1;       /*    40:52  8 */
> 	__u64                      enable_on_exec:1;     /*    40:51  8 */
> 	__u64                      task:1;               /*    40:50  8 */
> 	__u64                      watermark:1;          /*    40:49  8 */
> 	__u64                      precise_ip:2;         /*    40:47  8 */
> 	__u64                      mmap_data:1;          /*    40:46  8 */
> 	__u64                      sample_id_all:1;      /*    40:45  8 */
> 	__u64                      exclude_host:1;       /*    40:44  8 */
> 	__u64                      exclude_guest:1;      /*    40:43  8 */
> 	__u64                      exclude_callchain_kernel:1; /*    40:42  8 */
> 	__u64                      exclude_callchain_user:1; /*    40:41  8 */
> 	__u64                      mmap2:1;              /*    40:40  8 */
> 	__u64                      comm_exec:1;          /*    40:39  8 */
> 	__u64                      use_clockid:1;        /*    40:38  8 */
> 	__u64                      context_switch:1;     /*    40:37  8 */
> 	__u64                      write_backward:1;     /*    40:36  8 */
> 	__u64                      namespaces:1;         /*    40:35  8 */
> 	__u64                      ksymbol:1;            /*    40:34  8 */
> 	__u64                      bpf_event:1;          /*    40:33  8 */
> 	__u64                      __reserved_1:33;      /*    40: 0  8 */
> 	union {
> 		__u32              wakeup_events;        /*    48     4 */
> 		__u32              wakeup_watermark;     /*    48     4 */
> 	};                                               /*    48     4 */
> 	__u32                      bp_type;              /*    52     4 */
> 	union {
> 		__u64              bp_addr;              /*    56     8 */
> 		__u64              kprobe_func;          /*    56     8 */
> 		__u64              uprobe_path;          /*    56     8 */
> 		__u64              config1;              /*    56     8 */
> 	};                                               /*    56     8 */
> 	/* --- cacheline 1 boundary (64 bytes) --- */
> 	union {
> 		__u64              bp_len;               /*    64     8 */
> 		__u64              kprobe_addr;          /*    64     8 */
> 		__u64              probe_offset;         /*    64     8 */
> 		__u64              config2;              /*    64     8 */
> 	};                                               /*    64     8 */
> 	__u64                      branch_sample_type;   /*    72     8 */
> 	__u64                      sample_regs_user;     /*    80     8 */
> 	__u32                      sample_stack_user;    /*    88     4 */
> 	__s32                      clockid;              /*    92     4 */
> 	__u64                      sample_regs_intr;     /*    96     8 */
> 	__u32                      aux_watermark;        /*   104     4 */
> 	__u16                      sample_max_stack;     /*   108     2 */
> 	__u16                      __reserved_2;         /*   110     2 */
> 
> 	/* size: 112, cachelines: 2, members: 49 */
> 	/* last cacheline: 48 bytes */
> };
> [acme@quaco pahole]$

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-07 14:09   ` Arnaldo Carvalho de Melo
  2019-03-07 14:11     ` Arnaldo Carvalho de Melo
@ 2019-03-08 19:39     ` Andrii Nakryiko
  1 sibling, 0 replies; 11+ messages in thread
From: Andrii Nakryiko @ 2019-03-08 19:39 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, Alexei Starovoitov, Yonghong Song, dwarves, bpf

On Thu, Mar 7, 2019 at 6:09 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Thu, Mar 07, 2019 at 11:02:47AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Wed, Mar 06, 2019 at 04:23:21PM -0800, Andrii Nakryiko escreveu:
> > > Due to this assumption, libdwarves and other parts of pahole are using 16-bit
> > > counters to iterate over entities within CU. This can cause infinite loop when
> > > iterating BTF data, if there are more than 65535 types. This patch changes
> > > non-public variables to use 32-bit integers, where appropriate.
>
> > Ok, there are some bits that are unrelated, I'm comment on it and remove
> > before applying.

Yeah, thanks for cleaning this up! My intent was to convert all places
where we rely on uint16_t for things that potentially can overflow (so
not just IDs) and are safe to change without binary compatibility
issues. I didn't intend this to cause you more work, sorry about that.

>
> Ooops, see below, I'm removing the non-type related parts, and will look
> at changing the types till btfdiff with that allyesconfig aarch64
> vmlinux image shows no diffs.
>
> - Arnaldo

<snip>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-08 18:45     ` Arnaldo Carvalho de Melo
@ 2019-03-08 19:42       ` Andrii Nakryiko
  2019-03-11  4:31         ` Andrii Nakryiko
  0 siblings, 1 reply; 11+ messages in thread
From: Andrii Nakryiko @ 2019-03-08 19:42 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, Alexei Starovoitov, Yonghong Song, Song Liu,
	Martin Lau, Jiri Olsa, Namhyung Kim, dwarves, bpf

On Fri, Mar 8, 2019 at 10:45 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Thu, Mar 07, 2019 at 09:26:37PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Several looks similar and the low hanging fruit to investigate, seems to
> > be enum bitfields, and the others may as well end up being the same, in
> > miscalculated stats for structs embedded in other structs:
>
> I had little time to further look into this, but from what I've seen the
> good news is that it seems the problem is with the DWARF loader, the BTF
> one producing the right results 8-) I'll take a look at a fix you made
> for packed enums and probably use the same thing on the DWARF loader.

Yeah, I suspected it might be related to base_type__name_to_size()
logic I removed for btf_loader. Ideally we should take the size from
DWARF data itself (assuming it's there) and remove
base_type__name_to_size() and related parts.

>
> - Arnaldo
>
> > $ btfdiff examples/vmlinux-aarch64
> > --- /tmp/btfdiff.dwarf.81KCPb 2019-03-07 18:20:13.153319625 -0300
> > +++ /tmp/btfdiff.btf.g1QkcZ   2019-03-07 18:20:13.928328675 -0300

<snip>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-08 19:42       ` Andrii Nakryiko
@ 2019-03-11  4:31         ` Andrii Nakryiko
  2019-03-11 14:53           ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 11+ messages in thread
From: Andrii Nakryiko @ 2019-03-11  4:31 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, Alexei Starovoitov, Yonghong Song, Song Liu,
	Martin Lau, Jiri Olsa, Namhyung Kim, dwarves, bpf

On Fri, Mar 8, 2019 at 11:42 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Mar 8, 2019 at 10:45 AM Arnaldo Carvalho de Melo
> <arnaldo.melo@gmail.com> wrote:
> >
> > Em Thu, Mar 07, 2019 at 09:26:37PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Several looks similar and the low hanging fruit to investigate, seems to
> > > be enum bitfields, and the others may as well end up being the same, in
> > > miscalculated stats for structs embedded in other structs:
> >
> > I had little time to further look into this, but from what I've seen the
> > good news is that it seems the problem is with the DWARF loader, the BTF
> > one producing the right results 8-) I'll take a look at a fix you made
> > for packed enums and probably use the same thing on the DWARF loader.
>
> Yeah, I suspected it might be related to base_type__name_to_size()
> logic I removed for btf_loader. Ideally we should take the size from
> DWARF data itself (assuming it's there) and remove
> base_type__name_to_size() and related parts.

So I got a chance today to verify your changes from
tmp.type_id_t-as-uint32_t branch. They look good to me. I've tested
old and new version of pahole on few of my kernel images, both typical
one and allyesconfig one. They both produced identical binaries after
BTF encoding and deduplication, which seems to be good indication that
nothing is broken. I hope you can push those changes into master soon.

I've also took a brief look at btfdiff differences for allyesconfig.
There are not that many of them: just 16k of output text, which given
200k types is not a lot.

I've noticed that there are problems for packed enum fields, which are
not handled properly neither in DWARF, nor in BTF.

Here's one example:

 struct myrb_dcdb {
        unsigned int               target:4;             /*     0:28  4 */
        unsigned int               channel:4;            /*     0:24  4 */

-       /* Bitfield combined with next fields */
+       /* XXX 24 bits hole, try to pack */

        enum {
                MYRB_DCDB_XFER_NONE = 0,
                MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
                MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
                MYRB_DCDB_XFER_ILLEGAL = 3,
-       } data_xfer:2;                                     /*     1     4 */
+       } data_xfer:2;                                     /*     4     1 */

        /* Bitfield combined with previous fields */

        unsigned int               early_status:1;       /*     0:21  4 */
        unsigned int               rsvd1:1;              /*     0:20  4 */

-       /* XXX 4 bits hole, try to pack */
-       /* Bitfield combined with next fields */
+       /* XXX 28 bits hole, try to pack */

        enum {
                MYRB_DCDB_TMO_24_HRS = 0,
                MYRB_DCDB_TMO_10_SECS = 1,
                MYRB_DCDB_TMO_60_SECS = 2,
                MYRB_DCDB_TMO_10_MINS = 3,
-       } timeout:2;                                       /*     1     4 */
+       } timeout:2;                                       /*     4     1 */

        /* Bitfield combined with previous fields */

@@ -324624,10 +324641,10 @@ struct myrb_dcdb {
        unsigned char              rsvd2;                /*    87     1 */

        /* size: 88, cachelines: 2, members: 17 */
-       /* bit holes: 2, sum bit holes: 16 bits */
+       /* bit holes: 3, sum bit holes: 64 bits */
        /* last cacheline: 24 bytes */

-       /* BRAIN FART ALERT! 88 != 83 + 0(holes), diff = 5 */
+       /* BRAIN FART ALERT! 88 != 86 + 0(holes), diff = 2 */
 };


Both DWARF and BTF output emits BRAIN FART ALERT and both can't handle
2-bit enums.

Here's source code definition of that struct:

struct myrb_dcdb {
        unsigned target:4;                               /* Byte 0 Bits 0-3 */
        unsigned channel:4;                              /* Byte 0 Bits 4-7 */
        enum {
                MYRB_DCDB_XFER_NONE =           0,
                MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
                MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
                MYRB_DCDB_XFER_ILLEGAL =        3
        } __packed data_xfer:2;                         /* Byte 1 Bits 0-1 */
        unsigned early_status:1;                        /* Byte 1 Bit 2 */
        unsigned rsvd1:1;                               /* Byte 1 Bit 3 */
        enum {
                MYRB_DCDB_TMO_24_HRS =  0,
                MYRB_DCDB_TMO_10_SECS = 1,
                MYRB_DCDB_TMO_60_SECS = 2,
                MYRB_DCDB_TMO_10_MINS = 3
        } __packed timeout:2;                           /* Byte 1 Bits 4-5 */
        unsigned no_autosense:1;                        /* Byte 1 Bit 6 */
        unsigned allow_disconnect:1;                    /* Byte 1 Bit 7 */
        unsigned short xfer_len_lo;                     /* Bytes 2-3 */
        u32 dma_addr;                                   /* Bytes 4-7 */
        unsigned char cdb_len:4;                        /* Byte 8 Bits 0-3 */
        unsigned char xfer_len_hi4:4;                   /* Byte 8 Bits 4-7 */
        unsigned char sense_len;                        /* Byte 9 */
        unsigned char cdb[12];                          /* Bytes 10-21 */
        unsigned char sense[64];                        /* Bytes 22-85 */
        unsigned char status;                           /* Byte 86 */
        unsigned char rsvd2;                            /* Byte 87 */
};

I've checked raw BTF data for that struct:

[12835109] <STRUCT> 'myrb_dcdb' (17 members)
    #00 target (off=0, sz=4) --> [12832925]
    #01 channel (off=4, sz=4) --> [12832925]
    #02 data_xfer (off=32, sz=2) --> [12835107]
    #03 early_status (off=10, sz=1) --> [12832925]
    #04 rsvd1 (off=11, sz=1) --> [12832925]
    #05 timeout (off=36, sz=2) --> [12835108]
    #06 no_autosense (off=14, sz=1) --> [12832925]
    #07 allow_disconnect (off=15, sz=1) --> [12832925]
    #08 xfer_len_lo (off=16, sz=0) --> [12832933]
    #09 dma_addr (off=32, sz=0) --> [12832946]
    #10 cdb_len (off=64, sz=4) --> [12832929]
    #11 xfer_len_hi4 (off=68, sz=4) --> [12832929]
    #12 sense_len (off=72, sz=0) --> [12832929]
    #13 cdb (off=80, sz=0) --> [12835084]
    #14 sense (off=176, sz=0) --> [12835110]
    #15 status (off=688, sz=0) --> [12832929]
    #16 rsvd2 (off=696, sz=0) --> [12832929]

off is a bit field offset, sz is bitfield size (0 if not bitfield).

Notice how data_xfer has correct sz=2, but off=32 is totally wrong and
should be 8. Similar issue with timeout with sz=2 and off=36 (should
be 12). So there seem to be some problem when doing DWARF to BTF
conversion. I haven't had chance to dig deeper, I'll try to create a
small repro and dig deeper when I get time (it's really hard to work
with allyesconfig anything due to humongous sizes of data/log/output).

In any case, what was your plan w.r.t. new release of pahole? Do you
want to iron out these obscure bitfield problems first and add
--progress before new version?


>
> >
> > - Arnaldo
> >
> > > $ btfdiff examples/vmlinux-aarch64
> > > --- /tmp/btfdiff.dwarf.81KCPb 2019-03-07 18:20:13.153319625 -0300
> > > +++ /tmp/btfdiff.btf.g1QkcZ   2019-03-07 18:20:13.928328675 -0300
>
> <snip>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-11  4:31         ` Andrii Nakryiko
@ 2019-03-11 14:53           ` Arnaldo Carvalho de Melo
  2019-03-11 16:39             ` Andrii Nakryiko
  0 siblings, 1 reply; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-11 14:53 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Arnaldo Carvalho de Melo, Andrii Nakryiko, Alexei Starovoitov,
	Yonghong Song, Song Liu, Martin Lau, Jiri Olsa, Namhyung Kim,
	dwarves, bpf

Em Sun, Mar 10, 2019 at 09:31:51PM -0700, Andrii Nakryiko escreveu:
> On Fri, Mar 8, 2019 at 11:42 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Fri, Mar 8, 2019 at 10:45 AM Arnaldo Carvalho de Melo
> > <arnaldo.melo@gmail.com> wrote:
> > >
> > > Em Thu, Mar 07, 2019 at 09:26:37PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > > Several looks similar and the low hanging fruit to investigate, seems to
> > > > be enum bitfields, and the others may as well end up being the same, in
> > > > miscalculated stats for structs embedded in other structs:
> > >
> > > I had little time to further look into this, but from what I've seen the
> > > good news is that it seems the problem is with the DWARF loader, the BTF
> > > one producing the right results 8-) I'll take a look at a fix you made
> > > for packed enums and probably use the same thing on the DWARF loader.
> >
> > Yeah, I suspected it might be related to base_type__name_to_size()
> > logic I removed for btf_loader. Ideally we should take the size from
> > DWARF data itself (assuming it's there) and remove
> > base_type__name_to_size() and related parts.
> 
> So I got a chance today to verify your changes from
> tmp.type_id_t-as-uint32_t branch. They look good to me. I've tested
> old and new version of pahole on few of my kernel images, both typical
> one and allyesconfig one. They both produced identical binaries after
> BTF encoding and deduplication, which seems to be good indication that
> nothing is broken. I hope you can push those changes into master soon.
> 
> I've also took a brief look at btfdiff differences for allyesconfig.
> There are not that many of them: just 16k of output text, which given
> 200k types is not a lot.
> 
> I've noticed that there are problems for packed enum fields, which are
> not handled properly neither in DWARF, nor in BTF.
> 
> Here's one example:
> 
>  struct myrb_dcdb {
>         unsigned int               target:4;             /*     0:28  4 */
>         unsigned int               channel:4;            /*     0:24  4 */
> 
> -       /* Bitfield combined with next fields */
> +       /* XXX 24 bits hole, try to pack */
> 
>         enum {
>                 MYRB_DCDB_XFER_NONE = 0,
>                 MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
>                 MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
>                 MYRB_DCDB_XFER_ILLEGAL = 3,
> -       } data_xfer:2;                                     /*     1     4 */
> +       } data_xfer:2;                                     /*     4     1 */
> 
>         /* Bitfield combined with previous fields */
> 
>         unsigned int               early_status:1;       /*     0:21  4 */
>         unsigned int               rsvd1:1;              /*     0:20  4 */
> 
> -       /* XXX 4 bits hole, try to pack */
> -       /* Bitfield combined with next fields */
> +       /* XXX 28 bits hole, try to pack */
> 
>         enum {
>                 MYRB_DCDB_TMO_24_HRS = 0,
>                 MYRB_DCDB_TMO_10_SECS = 1,
>                 MYRB_DCDB_TMO_60_SECS = 2,
>                 MYRB_DCDB_TMO_10_MINS = 3,
> -       } timeout:2;                                       /*     1     4 */
> +       } timeout:2;                                       /*     4     1 */
> 
>         /* Bitfield combined with previous fields */
> 
> @@ -324624,10 +324641,10 @@ struct myrb_dcdb {
>         unsigned char              rsvd2;                /*    87     1 */
> 
>         /* size: 88, cachelines: 2, members: 17 */
> -       /* bit holes: 2, sum bit holes: 16 bits */
> +       /* bit holes: 3, sum bit holes: 64 bits */
>         /* last cacheline: 24 bytes */
> 
> -       /* BRAIN FART ALERT! 88 != 83 + 0(holes), diff = 5 */
> +       /* BRAIN FART ALERT! 88 != 86 + 0(holes), diff = 2 */
>  };
> 
> 
> Both DWARF and BTF output emits BRAIN FART ALERT and both can't handle
> 2-bit enums.
> 
> Here's source code definition of that struct:
> 
> struct myrb_dcdb {
>         unsigned target:4;                               /* Byte 0 Bits 0-3 */
>         unsigned channel:4;                              /* Byte 0 Bits 4-7 */
>         enum {
>                 MYRB_DCDB_XFER_NONE =           0,
>                 MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
>                 MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
>                 MYRB_DCDB_XFER_ILLEGAL =        3
>         } __packed data_xfer:2;                         /* Byte 1 Bits 0-1 */
>         unsigned early_status:1;                        /* Byte 1 Bit 2 */
>         unsigned rsvd1:1;                               /* Byte 1 Bit 3 */
>         enum {
>                 MYRB_DCDB_TMO_24_HRS =  0,
>                 MYRB_DCDB_TMO_10_SECS = 1,
>                 MYRB_DCDB_TMO_60_SECS = 2,
>                 MYRB_DCDB_TMO_10_MINS = 3
>         } __packed timeout:2;                           /* Byte 1 Bits 4-5 */
>         unsigned no_autosense:1;                        /* Byte 1 Bit 6 */
>         unsigned allow_disconnect:1;                    /* Byte 1 Bit 7 */
>         unsigned short xfer_len_lo;                     /* Bytes 2-3 */
>         u32 dma_addr;                                   /* Bytes 4-7 */
>         unsigned char cdb_len:4;                        /* Byte 8 Bits 0-3 */
>         unsigned char xfer_len_hi4:4;                   /* Byte 8 Bits 4-7 */
>         unsigned char sense_len;                        /* Byte 9 */
>         unsigned char cdb[12];                          /* Bytes 10-21 */
>         unsigned char sense[64];                        /* Bytes 22-85 */
>         unsigned char status;                           /* Byte 86 */
>         unsigned char rsvd2;                            /* Byte 87 */
> };
> 
> I've checked raw BTF data for that struct:
> 
> [12835109] <STRUCT> 'myrb_dcdb' (17 members)
>     #00 target (off=0, sz=4) --> [12832925]
>     #01 channel (off=4, sz=4) --> [12832925]
>     #02 data_xfer (off=32, sz=2) --> [12835107]
>     #03 early_status (off=10, sz=1) --> [12832925]
>     #04 rsvd1 (off=11, sz=1) --> [12832925]
>     #05 timeout (off=36, sz=2) --> [12835108]
>     #06 no_autosense (off=14, sz=1) --> [12832925]
>     #07 allow_disconnect (off=15, sz=1) --> [12832925]
>     #08 xfer_len_lo (off=16, sz=0) --> [12832933]
>     #09 dma_addr (off=32, sz=0) --> [12832946]
>     #10 cdb_len (off=64, sz=4) --> [12832929]
>     #11 xfer_len_hi4 (off=68, sz=4) --> [12832929]
>     #12 sense_len (off=72, sz=0) --> [12832929]
>     #13 cdb (off=80, sz=0) --> [12835084]
>     #14 sense (off=176, sz=0) --> [12835110]
>     #15 status (off=688, sz=0) --> [12832929]
>     #16 rsvd2 (off=696, sz=0) --> [12832929]
> 
> off is a bit field offset, sz is bitfield size (0 if not bitfield).
> 
> Notice how data_xfer has correct sz=2, but off=32 is totally wrong and
> should be 8. Similar issue with timeout with sz=2 and off=36 (should
> be 12). So there seem to be some problem when doing DWARF to BTF
> conversion. I haven't had chance to dig deeper, I'll try to create a
> small repro and dig deeper when I get time (it's really hard to work
> with allyesconfig anything due to humongous sizes of data/log/output).

Right, what I'm doing now is to pick the structs that and having things
like:

[acme@quaco pahole]$ size examples/DAC960.o
   text	   data	    bss	    dec	    hex	filename
      0	      0	      0	      0	      0	examples/DAC960.o
[acme@quaco pahole]$ ls -la examples/DAC960.o
-rwxrwxr-x. 1 acme acme 10736 Mar  8 11:07 examples/DAC960.o
[acme@quaco pahole]$ ls -la examples/DAC960.c 
-rw-rw-r--. 1 acme acme 4480 Mar  8 11:04 examples/DAC960.c
[acme@quaco pahole]$ pahole --sizes examples/DAC960.o
myrb_enquiry2	128	0
[acme@quaco pahole]
[acme@quaco pahole]$ btfdiff examples/DAC960.o
--- /tmp/btfdiff.dwarf.VMTvcw	2019-03-11 11:51:07.858695537 -0300
+++ /tmp/btfdiff.btf.f75DjU	2019-03-11 11:51:07.860695576 -0300
@@ -52,18 +52,18 @@ struct myrb_enquiry2 {
 			MYRB_RAM_TYPE_EDO = 1,
 			MYRB_RAM_TYPE_SDRAM = 2,
 			MYRB_RAM_TYPE_Last = 7,
-		} ram:3;                                   /*    40     4 */
+		} ram:3;                                   /*    43     1 */
 		enum {
 			MYRB_ERR_CORR_None = 0,
 			MYRB_ERR_CORR_Parity = 1,
 			MYRB_ERR_CORR_ECC = 2,
 			MYRB_ERR_CORR_Last = 7,
-		} ec:3;                                    /*    40     4 */
-		unsigned char      fast_page:1;          /*    40: 1  1 */
-		unsigned char      low_power:1;          /*    40: 0  1 */
+		} ec:3;                                    /*    43     1 */
 
-		/* Bitfield combined with next fields */
+		/* Bitfield combined with previous fields */
 
+		unsigned char      fast_page:1;          /*    40: 1  1 */
+		unsigned char      low_power:1;          /*    40: 0  1 */
 		unsigned char      rsvd4;                /*    41     1 */
 	} mem_type;                                      /*    40     2 */
 	short unsigned int         clock_speed;          /*    42     2 */
@@ -94,18 +94,18 @@ struct myrb_enquiry2 {
 			MYRB_WIDTH_NARROW_8BIT = 0,
 			MYRB_WIDTH_WIDE_16BIT = 1,
 			MYRB_WIDTH_WIDE_32BIT = 2,
-		} bus_width:2;                             /*   106     4 */
+		} bus_width:2;                             /*   109     1 */
 		enum {
 			MYRB_SCSI_SPEED_FAST = 0,
 			MYRB_SCSI_SPEED_ULTRA = 1,
 			MYRB_SCSI_SPEED_ULTRA2 = 2,
-		} bus_speed:2;                             /*   106     4 */
+		} bus_speed:2;                             /*   109     1 */
+
+		/* Bitfield combined with previous fields */
+
 		unsigned char      differential:1;       /*   106: 3  1 */
 		unsigned char      rsvd10:3;             /*   106: 0  1 */
 	} scsi_cap;                                      /*   106     1 */
-
-	/* XXX last struct has 65533 bytes of padding */
-
 	unsigned char              rsvd11[5];            /*   107     5 */
 	short unsigned int         fw_build;             /*   112     2 */
 	enum {
@@ -127,5 +127,4 @@ struct myrb_enquiry2 {
 	unsigned char              rsvd14[8];            /*   120     8 */
 
 	/* size: 128, cachelines: 2, members: 46 */
-	/* paddings: 1, sum paddings: 65533 */
 };
[acme@quaco pahole]$ 
 
> In any case, what was your plan w.r.t. new release of pahole? Do you
> want to iron out these obscure bitfield problems first and add
> --progress before new version?

--progress can wait, what I would like would be to have btfdiff clean
for that allyesconfig kernel, fixing these last odd cases. Getting the
exact same output (modulo flat arrays and the show_private_classes used
in btfdiff) would inspire more confidence in the whole thing, I think.

I've added your Tested-by to the csets changing uint16_t to uint32_t and
pushing to master now. Will try to spend some time fixing these last
issues.

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH pahole] pahole: use 32-bit integers for iterations within CU
  2019-03-11 14:53           ` Arnaldo Carvalho de Melo
@ 2019-03-11 16:39             ` Andrii Nakryiko
  0 siblings, 0 replies; 11+ messages in thread
From: Andrii Nakryiko @ 2019-03-11 16:39 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Andrii Nakryiko, Alexei Starovoitov, Yonghong Song, Song Liu,
	Martin Lau, Jiri Olsa, Namhyung Kim, dwarves, bpf

On Mon, Mar 11, 2019 at 7:53 AM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Sun, Mar 10, 2019 at 09:31:51PM -0700, Andrii Nakryiko escreveu:
> > On Fri, Mar 8, 2019 at 11:42 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Fri, Mar 8, 2019 at 10:45 AM Arnaldo Carvalho de Melo
> > > <arnaldo.melo@gmail.com> wrote:
> > > >
> > > > Em Thu, Mar 07, 2019 at 09:26:37PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > > > Several looks similar and the low hanging fruit to investigate, seems to
> > > > > be enum bitfields, and the others may as well end up being the same, in
> > > > > miscalculated stats for structs embedded in other structs:
> > > >
> > > > I had little time to further look into this, but from what I've seen the
> > > > good news is that it seems the problem is with the DWARF loader, the BTF
> > > > one producing the right results 8-) I'll take a look at a fix you made
> > > > for packed enums and probably use the same thing on the DWARF loader.
> > >
> > > Yeah, I suspected it might be related to base_type__name_to_size()
> > > logic I removed for btf_loader. Ideally we should take the size from
> > > DWARF data itself (assuming it's there) and remove
> > > base_type__name_to_size() and related parts.
> >
> > So I got a chance today to verify your changes from
> > tmp.type_id_t-as-uint32_t branch. They look good to me. I've tested
> > old and new version of pahole on few of my kernel images, both typical
> > one and allyesconfig one. They both produced identical binaries after
> > BTF encoding and deduplication, which seems to be good indication that
> > nothing is broken. I hope you can push those changes into master soon.
> >
> > I've also took a brief look at btfdiff differences for allyesconfig.
> > There are not that many of them: just 16k of output text, which given
> > 200k types is not a lot.
> >
> > I've noticed that there are problems for packed enum fields, which are
> > not handled properly neither in DWARF, nor in BTF.
> >
> > Here's one example:
> >
> >  struct myrb_dcdb {
> >         unsigned int               target:4;             /*     0:28  4 */
> >         unsigned int               channel:4;            /*     0:24  4 */
> >
> > -       /* Bitfield combined with next fields */
> > +       /* XXX 24 bits hole, try to pack */
> >
> >         enum {
> >                 MYRB_DCDB_XFER_NONE = 0,
> >                 MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
> >                 MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
> >                 MYRB_DCDB_XFER_ILLEGAL = 3,
> > -       } data_xfer:2;                                     /*     1     4 */
> > +       } data_xfer:2;                                     /*     4     1 */
> >
> >         /* Bitfield combined with previous fields */
> >
> >         unsigned int               early_status:1;       /*     0:21  4 */
> >         unsigned int               rsvd1:1;              /*     0:20  4 */
> >
> > -       /* XXX 4 bits hole, try to pack */
> > -       /* Bitfield combined with next fields */
> > +       /* XXX 28 bits hole, try to pack */
> >
> >         enum {
> >                 MYRB_DCDB_TMO_24_HRS = 0,
> >                 MYRB_DCDB_TMO_10_SECS = 1,
> >                 MYRB_DCDB_TMO_60_SECS = 2,
> >                 MYRB_DCDB_TMO_10_MINS = 3,
> > -       } timeout:2;                                       /*     1     4 */
> > +       } timeout:2;                                       /*     4     1 */
> >
> >         /* Bitfield combined with previous fields */
> >
> > @@ -324624,10 +324641,10 @@ struct myrb_dcdb {
> >         unsigned char              rsvd2;                /*    87     1 */
> >
> >         /* size: 88, cachelines: 2, members: 17 */
> > -       /* bit holes: 2, sum bit holes: 16 bits */
> > +       /* bit holes: 3, sum bit holes: 64 bits */
> >         /* last cacheline: 24 bytes */
> >
> > -       /* BRAIN FART ALERT! 88 != 83 + 0(holes), diff = 5 */
> > +       /* BRAIN FART ALERT! 88 != 86 + 0(holes), diff = 2 */
> >  };
> >
> >
> > Both DWARF and BTF output emits BRAIN FART ALERT and both can't handle
> > 2-bit enums.
> >
> > Here's source code definition of that struct:
> >
> > struct myrb_dcdb {
> >         unsigned target:4;                               /* Byte 0 Bits 0-3 */
> >         unsigned channel:4;                              /* Byte 0 Bits 4-7 */
> >         enum {
> >                 MYRB_DCDB_XFER_NONE =           0,
> >                 MYRB_DCDB_XFER_DEVICE_TO_SYSTEM = 1,
> >                 MYRB_DCDB_XFER_SYSTEM_TO_DEVICE = 2,
> >                 MYRB_DCDB_XFER_ILLEGAL =        3
> >         } __packed data_xfer:2;                         /* Byte 1 Bits 0-1 */
> >         unsigned early_status:1;                        /* Byte 1 Bit 2 */
> >         unsigned rsvd1:1;                               /* Byte 1 Bit 3 */
> >         enum {
> >                 MYRB_DCDB_TMO_24_HRS =  0,
> >                 MYRB_DCDB_TMO_10_SECS = 1,
> >                 MYRB_DCDB_TMO_60_SECS = 2,
> >                 MYRB_DCDB_TMO_10_MINS = 3
> >         } __packed timeout:2;                           /* Byte 1 Bits 4-5 */
> >         unsigned no_autosense:1;                        /* Byte 1 Bit 6 */
> >         unsigned allow_disconnect:1;                    /* Byte 1 Bit 7 */
> >         unsigned short xfer_len_lo;                     /* Bytes 2-3 */
> >         u32 dma_addr;                                   /* Bytes 4-7 */
> >         unsigned char cdb_len:4;                        /* Byte 8 Bits 0-3 */
> >         unsigned char xfer_len_hi4:4;                   /* Byte 8 Bits 4-7 */
> >         unsigned char sense_len;                        /* Byte 9 */
> >         unsigned char cdb[12];                          /* Bytes 10-21 */
> >         unsigned char sense[64];                        /* Bytes 22-85 */
> >         unsigned char status;                           /* Byte 86 */
> >         unsigned char rsvd2;                            /* Byte 87 */
> > };
> >
> > I've checked raw BTF data for that struct:
> >
> > [12835109] <STRUCT> 'myrb_dcdb' (17 members)
> >     #00 target (off=0, sz=4) --> [12832925]
> >     #01 channel (off=4, sz=4) --> [12832925]
> >     #02 data_xfer (off=32, sz=2) --> [12835107]
> >     #03 early_status (off=10, sz=1) --> [12832925]
> >     #04 rsvd1 (off=11, sz=1) --> [12832925]
> >     #05 timeout (off=36, sz=2) --> [12835108]
> >     #06 no_autosense (off=14, sz=1) --> [12832925]
> >     #07 allow_disconnect (off=15, sz=1) --> [12832925]
> >     #08 xfer_len_lo (off=16, sz=0) --> [12832933]
> >     #09 dma_addr (off=32, sz=0) --> [12832946]
> >     #10 cdb_len (off=64, sz=4) --> [12832929]
> >     #11 xfer_len_hi4 (off=68, sz=4) --> [12832929]
> >     #12 sense_len (off=72, sz=0) --> [12832929]
> >     #13 cdb (off=80, sz=0) --> [12835084]
> >     #14 sense (off=176, sz=0) --> [12835110]
> >     #15 status (off=688, sz=0) --> [12832929]
> >     #16 rsvd2 (off=696, sz=0) --> [12832929]
> >
> > off is a bit field offset, sz is bitfield size (0 if not bitfield).
> >
> > Notice how data_xfer has correct sz=2, but off=32 is totally wrong and
> > should be 8. Similar issue with timeout with sz=2 and off=36 (should
> > be 12). So there seem to be some problem when doing DWARF to BTF
> > conversion. I haven't had chance to dig deeper, I'll try to create a
> > small repro and dig deeper when I get time (it's really hard to work
> > with allyesconfig anything due to humongous sizes of data/log/output).
>
> Right, what I'm doing now is to pick the structs that and having things
> like:
>
> [acme@quaco pahole]$ size examples/DAC960.o
>    text    data     bss     dec     hex filename
>       0       0       0       0       0 examples/DAC960.o
> [acme@quaco pahole]$ ls -la examples/DAC960.o
> -rwxrwxr-x. 1 acme acme 10736 Mar  8 11:07 examples/DAC960.o
> [acme@quaco pahole]$ ls -la examples/DAC960.c
> -rw-rw-r--. 1 acme acme 4480 Mar  8 11:04 examples/DAC960.c
> [acme@quaco pahole]$ pahole --sizes examples/DAC960.o
> myrb_enquiry2   128     0
> [acme@quaco pahole]
> [acme@quaco pahole]$ btfdiff examples/DAC960.o
> --- /tmp/btfdiff.dwarf.VMTvcw   2019-03-11 11:51:07.858695537 -0300
> +++ /tmp/btfdiff.btf.f75DjU     2019-03-11 11:51:07.860695576 -0300
> @@ -52,18 +52,18 @@ struct myrb_enquiry2 {
>                         MYRB_RAM_TYPE_EDO = 1,
>                         MYRB_RAM_TYPE_SDRAM = 2,
>                         MYRB_RAM_TYPE_Last = 7,
> -               } ram:3;                                   /*    40     4 */
> +               } ram:3;                                   /*    43     1 */
>                 enum {
>                         MYRB_ERR_CORR_None = 0,
>                         MYRB_ERR_CORR_Parity = 1,
>                         MYRB_ERR_CORR_ECC = 2,
>                         MYRB_ERR_CORR_Last = 7,
> -               } ec:3;                                    /*    40     4 */
> -               unsigned char      fast_page:1;          /*    40: 1  1 */
> -               unsigned char      low_power:1;          /*    40: 0  1 */
> +               } ec:3;                                    /*    43     1 */
>
> -               /* Bitfield combined with next fields */
> +               /* Bitfield combined with previous fields */
>
> +               unsigned char      fast_page:1;          /*    40: 1  1 */
> +               unsigned char      low_power:1;          /*    40: 0  1 */
>                 unsigned char      rsvd4;                /*    41     1 */
>         } mem_type;                                      /*    40     2 */
>         short unsigned int         clock_speed;          /*    42     2 */
> @@ -94,18 +94,18 @@ struct myrb_enquiry2 {
>                         MYRB_WIDTH_NARROW_8BIT = 0,
>                         MYRB_WIDTH_WIDE_16BIT = 1,
>                         MYRB_WIDTH_WIDE_32BIT = 2,
> -               } bus_width:2;                             /*   106     4 */
> +               } bus_width:2;                             /*   109     1 */
>                 enum {
>                         MYRB_SCSI_SPEED_FAST = 0,
>                         MYRB_SCSI_SPEED_ULTRA = 1,
>                         MYRB_SCSI_SPEED_ULTRA2 = 2,
> -               } bus_speed:2;                             /*   106     4 */
> +               } bus_speed:2;                             /*   109     1 */
> +
> +               /* Bitfield combined with previous fields */
> +
>                 unsigned char      differential:1;       /*   106: 3  1 */
>                 unsigned char      rsvd10:3;             /*   106: 0  1 */
>         } scsi_cap;                                      /*   106     1 */
> -
> -       /* XXX last struct has 65533 bytes of padding */
> -
>         unsigned char              rsvd11[5];            /*   107     5 */
>         short unsigned int         fw_build;             /*   112     2 */
>         enum {
> @@ -127,5 +127,4 @@ struct myrb_enquiry2 {
>         unsigned char              rsvd14[8];            /*   120     8 */
>
>         /* size: 128, cachelines: 2, members: 46 */
> -       /* paddings: 1, sum paddings: 65533 */
>  };
> [acme@quaco pahole]$
>
> > In any case, what was your plan w.r.t. new release of pahole? Do you
> > want to iron out these obscure bitfield problems first and add
> > --progress before new version?
>
> --progress can wait, what I would like would be to have btfdiff clean
> for that allyesconfig kernel, fixing these last odd cases. Getting the
> exact same output (modulo flat arrays and the show_private_classes used
> in btfdiff) would inspire more confidence in the whole thing, I think.

Yep, makes sense.

>
> I've added your Tested-by to the csets changing uint16_t to uint32_t and
> pushing to master now. Will try to spend some time fixing these last
> issues.

Sounds good!

>
> - Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-03-11 16:39 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-07  0:23 [PATCH pahole] pahole: use 32-bit integers for iterations within CU Andrii Nakryiko
2019-03-07 14:02 ` Arnaldo Carvalho de Melo
2019-03-07 14:09   ` Arnaldo Carvalho de Melo
2019-03-07 14:11     ` Arnaldo Carvalho de Melo
2019-03-08 19:39     ` Andrii Nakryiko
2019-03-08  0:26   ` Arnaldo Carvalho de Melo
2019-03-08 18:45     ` Arnaldo Carvalho de Melo
2019-03-08 19:42       ` Andrii Nakryiko
2019-03-11  4:31         ` Andrii Nakryiko
2019-03-11 14:53           ` Arnaldo Carvalho de Melo
2019-03-11 16:39             ` Andrii Nakryiko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.