Dwarves Archive on lore.kernel.org
 help / color / Atom feed
From: Andrii Nakryiko <andriin@fb.com>
To: <dwarves@vger.kernel.org>
Cc: <bpf@vger.kernel.org>, Andrii Nakryiko <andriin@fb.com>
Subject: [PATCH dwarves 10/11] strings: use BTF's string APIs for strings management
Date: Tue, 29 Sep 2020 21:27:41 -0700
Message-ID: <20200930042742.2525310-11-andriin@fb.com> (raw)
In-Reply-To: <20200930042742.2525310-1-andriin@fb.com>

Switch strings container to using struct btf and its
btf__add_str()/btf__find_str() APIs, which do equivalent internal string
deduplication. This turns out to be a very significantly faster than using
tsearch functions. To satisfy CTF encoding use case, some hacky string size
fetching approach is utilized, as libbpf doesn't provide direct API to get
total string section size and to copy over just strings data section.

BEFORE:
         22,624.28 msec task-clock                #    1.000 CPUs utilized
                85      context-switches          #    0.004 K/sec
                 3      cpu-migrations            #    0.000 K/sec
           622,545      page-faults               #    0.028 M/sec
    68,177,206,387      cycles                    #    3.013 GHz                      (24.99%)
   114,370,031,619      instructions              #    1.68  insn per cycle           (25.01%)
    26,125,001,179      branches                  # 1154.733 M/sec                    (25.01%)
       458,861,243      branch-misses             #    1.76% of all branches          (25.00%)
    24,533,455,967      L1-dcache-loads           # 1084.386 M/sec                    (25.02%)
       973,500,214      L1-dcache-load-misses     #    3.97% of all L1-dcache hits    (25.05%)
       338,773,561      LLC-loads                 #   14.974 M/sec                    (25.02%)
        12,651,196      LLC-load-misses           #    3.73% of all LL-cache hits     (25.00%)

      22.628910615 seconds time elapsed

      21.341063000 seconds user
       1.283763000 seconds sys

AFTER:
         18,362.97 msec task-clock                #    1.000 CPUs utilized
                37      context-switches          #    0.002 K/sec
                 0      cpu-migrations            #    0.000 K/sec
           626,281      page-faults               #    0.034 M/sec
    52,480,619,000      cycles                    #    2.858 GHz                      (25.00%)
   104,736,434,384      instructions              #    2.00  insn per cycle           (25.01%)
    23,878,428,465      branches                  # 1300.358 M/sec                    (25.01%)
       252,669,685      branch-misses             #    1.06% of all branches          (25.03%)
    21,829,390,952      L1-dcache-loads           # 1188.772 M/sec                    (25.04%)
       638,086,339      L1-dcache-load-misses     #    2.92% of all L1-dcache hits    (25.02%)
       212,327,435      LLC-loads                 #   11.563 M/sec                    (25.00%)
        14,578,117      LLC-load-misses           #    6.87% of all LL-cache hits     (25.00%)

      18.364427347 seconds time elapsed

      16.985494000 seconds user
       1.377959000 seconds sys

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 ctf_encoder.c |  2 +-
 libctf.c      | 14 ++++----
 libctf.h      |  4 +--
 strings.c     | 91 +++++++++++++++++++--------------------------------
 strings.h     | 32 +++---------------
 5 files changed, 49 insertions(+), 94 deletions(-)

diff --git a/ctf_encoder.c b/ctf_encoder.c
index 3cb455a33098..b761287d4534 100644
--- a/ctf_encoder.c
+++ b/ctf_encoder.c
@@ -248,7 +248,7 @@ int cu__encode_ctf(struct cu *cu, int verbose)
 	if (cu__cache_symtab(cu) < 0)
 		goto out_delete;
 
-	ctf__set_strings(ctf, &strings->gb);
+	ctf__set_strings(ctf, strings);
 
 	uint32_t id;
 	struct tag *pos;
diff --git a/libctf.c b/libctf.c
index 424f157c2415..7f375e47e9b2 100644
--- a/libctf.c
+++ b/libctf.c
@@ -19,6 +19,7 @@
 #include "ctf.h"
 #include "dutil.h"
 #include "gobuffer.h"
+#include "strings.h"
 
 bool ctf__ignore_symtab_function(const GElf_Sym *sym, const char *sym_name)
 {
@@ -284,7 +285,7 @@ int ctf__load_symtab(struct ctf *ctf)
 	return ctf->symtab == NULL ? -1 : 0;
 }
 
-void ctf__set_strings(struct ctf *ctf, struct gobuffer *strings)
+void ctf__set_strings(struct ctf *ctf, struct strings *strings)
 {
 	ctf->strings = strings;
 }
@@ -567,7 +568,7 @@ int ctf__encode(struct ctf *ctf, uint8_t flags)
 	size = (gobuffer__size(&ctf->types) +
 		gobuffer__size(&ctf->objects) +
 		gobuffer__size(&ctf->funcs) +
-		gobuffer__size(ctf->strings));
+		strings__size(ctf->strings));
 
 	ctf->size = sizeof(*hdr) + size;
 	ctf->buf = malloc(ctf->size);
@@ -591,13 +592,13 @@ int ctf__encode(struct ctf *ctf, uint8_t flags)
 	hdr->ctf_type_off = offset;
 	offset += gobuffer__size(&ctf->types);
 	hdr->ctf_str_off  = offset;
-	hdr->ctf_str_len  = gobuffer__size(ctf->strings);
+	hdr->ctf_str_len  = strings__size(ctf->strings);
 
 	void *payload = ctf->buf + sizeof(*hdr);
 	gobuffer__copy(&ctf->objects, payload + hdr->ctf_object_off);
 	gobuffer__copy(&ctf->funcs, payload + hdr->ctf_func_off);
 	gobuffer__copy(&ctf->types, payload + hdr->ctf_type_off);
-	gobuffer__copy(ctf->strings, payload + hdr->ctf_str_off);
+	strings__copy(ctf->strings, payload + hdr->ctf_str_off);
 
 	*(char *)(ctf->buf + sizeof(*hdr) + hdr->ctf_str_off) = '\0';
 	if (flags & CTF_FLAGS_COMPR) {
@@ -620,11 +621,10 @@ int ctf__encode(struct ctf *ctf, uint8_t flags)
 	}
 #if 0
 	printf("\n\ntypes:\n entries: %d\n size: %u"
-		 "\nstrings:\n entries: %u\n size: %u\ncompressed size: %d\n",
+		 "\nstrings:\n size: %u\ncompressed size: %d\n",
 	       ctf->type_index,
 	       gobuffer__size(&ctf->types),
-	       gobuffer__nr_entries(ctf->strings),
-	       gobuffer__size(ctf->strings), size);
+	       strings__size(ctf->strings), size);
 #endif
 	int fd = open(ctf->filename, O_RDWR);
 	if (fd < 0) {
diff --git a/libctf.h b/libctf.h
index 071616c72de3..749be8955c52 100644
--- a/libctf.h
+++ b/libctf.h
@@ -24,7 +24,7 @@ struct ctf {
 	struct gobuffer	  objects; /* data/variables */
 	struct gobuffer	  types;
 	struct gobuffer	  funcs;
-	struct gobuffer   *strings;
+	struct strings   *strings;
 	char		  *filename;
 	size_t		  size;
 	int		  swapped;
@@ -76,7 +76,7 @@ int ctf__add_function(struct ctf *ctf, uint16_t type, uint16_t nr_parms,
 
 int ctf__add_object(struct ctf *ctf, uint16_t type);
 
-void ctf__set_strings(struct ctf *ctf, struct gobuffer *strings);
+void ctf__set_strings(struct ctf *ctf, struct strings *strings);
 int  ctf__encode(struct ctf *ctf, uint8_t flags);
 
 char *ctf__string(struct ctf *ctf, uint32_t ref);
diff --git a/strings.c b/strings.c
index ddb2b1bd85b5..45f8faaeb15d 100644
--- a/strings.c
+++ b/strings.c
@@ -15,75 +15,41 @@
 #include <zlib.h>
 
 #include "dutil.h"
+#include "lib/bpf/src/libbpf.h"
 
 struct strings *strings__new(void)
 {
 	struct strings *strs = malloc(sizeof(*strs));
 
-	if (strs != NULL) {
-		strs->tree = NULL;
-		gobuffer__init(&strs->gb);
+	if (!strs)
+		return NULL;
+
+	strs->btf = btf__new_empty();
+	if (libbpf_get_error(strs->btf)) {
+		free(strs);
+		return NULL;
 	}
 
 	return strs;
-
-}
-
-static void do_nothing(void *ptr __unused)
-{
 }
 
 void strings__delete(struct strings *strs)
 {
 	if (strs == NULL)
 		return;
-	tdestroy(strs->tree, do_nothing);
-	__gobuffer__delete(&strs->gb);
+	btf__free(strs->btf);
 	free(strs);
 }
 
-static strings_t strings__insert(struct strings *strs, const char *s)
-{
-	return gobuffer__add(&strs->gb, s, strlen(s) + 1);
-}
-
-struct search_key {
-	struct strings *strs;
-	const char *str;
-};
-
-static int strings__compare(const void *a, const void *b)
-{
-	const struct search_key *key = a;
-
-	return strcmp(key->str, key->strs->gb.entries + (unsigned long)b);
-}
-
 strings_t strings__add(struct strings *strs, const char *str)
 {
-	unsigned long *s;
 	strings_t index;
-	struct search_key key = {
-		.strs = strs,
-		.str = str,
-	};
 
 	if (str == NULL)
 		return 0;
 
-	s = tsearch(&key, &strs->tree, strings__compare);
-	if (s != NULL) {
-		if (*(struct search_key **)s == (void *)&key) { /* Not found, replace with the right key */
-			index = strings__insert(strs, str);
-			if (index != 0)
-				*s = (unsigned long)index;
-			else {
-				tdelete(&key, &strs->tree, strings__compare);
-				return 0;
-			}
-		} else /* Found! */
-			index = *s;
-	} else
+	index = btf__add_str(strs->btf, str);
+	if (index < 0)
 		return 0;
 
 	return index;
@@ -91,21 +57,32 @@ strings_t strings__add(struct strings *strs, const char *str)
 
 strings_t strings__find(struct strings *strs, const char *str)
 {
-	strings_t *s;
-	struct search_key key = {
-		.strs = strs,
-		.str = str,
-	};
+	return btf__find_str(strs->btf, str);
+}
 
-	if (str == NULL)
-		return 0;
+/* a horrible and inefficient hack to get string section size out of BTF */
+strings_t strings__size(const struct strings *strs)
+{
+	const struct btf_header *p;
+	uint32_t sz;
+
+	p = btf__get_raw_data(strs->btf, &sz);
+	if (!p)
+		return -1;
 
-	s = tfind(&key, &strs->tree, strings__compare);
-	return s ? *s : 0;
+	return p->str_len;
 }
 
-int strings__cmp(const struct strings *strs, strings_t a, strings_t b)
+/* similarly horrible hack to copy out string section out of BTF */
+int strings__copy(const struct strings *strs, void *dst)
 {
-	return a == b ? 0 : strcmp(strings__ptr(strs, a),
-				   strings__ptr(strs, b));
+	const struct btf_header *p;
+	uint32_t sz;
+
+	p = btf__get_raw_data(strs->btf, &sz);
+	if (!p)
+		return -1;
+
+	memcpy(dst, (void *)p + p->str_off, p->str_len);
+	return 0;
 }
diff --git a/strings.h b/strings.h
index 01f50efd7adb..522fbf21de0d 100644
--- a/strings.h
+++ b/strings.h
@@ -6,13 +6,12 @@
   Copyright (C) 2008 Arnaldo Carvalho de Melo <acme@redhat.com>
 */
 
-#include "gobuffer.h"
+#include "lib/bpf/src/btf.h"
 
 typedef unsigned int strings_t;
 
 struct strings {
-	void		*tree;
-	struct gobuffer	gb;
+	struct btf *btf;
 };
 
 struct strings *strings__new(void);
@@ -21,33 +20,12 @@ void strings__delete(struct strings *strings);
 
 strings_t strings__add(struct strings *strings, const char *str);
 strings_t strings__find(struct strings *strings, const char *str);
-
-int strings__cmp(const struct strings *strings, strings_t a, strings_t b);
+strings_t strings__size(const struct strings *strings);
+int strings__copy(const struct strings *strings, void *dst);
 
 static inline const char *strings__ptr(const struct strings *strings, strings_t s)
 {
-	return gobuffer__ptr(&strings->gb, s);
-}
-
-static inline const char *strings__entries(const struct strings *strings)
-{
-	return gobuffer__entries(&strings->gb);
-}
-
-static inline unsigned int strings__nr_entries(const struct strings *strings)
-{
-	return gobuffer__nr_entries(&strings->gb);
-}
-
-static inline strings_t strings__size(const struct strings *strings)
-{
-	return gobuffer__size(&strings->gb);
-}
-
-static inline const char *strings__compress(struct strings *strings,
-					    unsigned int *size)
-{
-	return gobuffer__compress(&strings->gb, size);
+	return btf__str_by_offset(strings->btf, s);
 }
 
 #endif /* _STRINGS_H_ */
-- 
2.24.1


  parent reply index

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-30  4:27 [PATCH dwarves 00/11] Switch BTF loading and encoding to libbpf APIs Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 01/11] libbpf: update to latest libbpf version Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 02/11] btf_encoder: detect BTF encoding errors and exit Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 03/11] dwarves: expose and maintain active debug info loader operations Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 04/11] btf_loader: use libbpf to load BTF Andrii Nakryiko
2020-10-08 18:06   ` Arnaldo Carvalho de Melo
2020-10-08 19:32     ` Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 05/11] btf_encoder: use libbpf APIs to encode BTF type info Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 06/11] btf_encoder: fix emitting __ARRAY_SIZE_TYPE__ as index range type Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 07/11] btf_encoder: discard CUs after BTF encoding Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 08/11] btf_encoder: revamp how per-CPU variables are encoded Andrii Nakryiko
2020-09-30  4:27 ` [PATCH dwarves 09/11] dwarf_loader: increase the size of lookup hash map Andrii Nakryiko
2020-09-30  4:27 ` Andrii Nakryiko [this message]
2020-09-30  4:27 ` [PATCH dwarves 11/11] btf_encoder: support cross-compiled ELF binaries with different endianness Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200930042742.2525310-11-andriin@fb.com \
    --to=andriin@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=dwarves@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Dwarves Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/dwarves/0 dwarves/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dwarves dwarves/ https://lore.kernel.org/dwarves \
		dwarves@vger.kernel.org
	public-inbox-index dwarves

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.dwarves


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git