linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Improvements for kallsyms__parse
@ 2020-04-30 19:35 Ian Rogers
  2020-04-30 19:35 ` [PATCH] perf bench: add kallsyms parsing Ian Rogers
  2020-04-30 19:35 ` [PATCH 2/2] lib kallsyms: parse using io api Ian Rogers
  0 siblings, 2 replies; 6+ messages in thread
From: Ian Rogers @ 2020-04-30 19:35 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Thomas Gleixner, linux-kernel
  Cc: Stephane Eranian, Ian Rogers

kallsyms__parse is called 4 times during perf record startup. Add a
benchmark to measure its performance. Transition it to using the api
io.h buffered reading, improving performance by ~8% or saving ~5% from
perf record start up time.

Ian Rogers (2):
  perf bench: add kallsyms parsing
  lib kallsyms: parse using io api

 tools/lib/api/io.h          |  3 ++
 tools/lib/symbol/kallsyms.c | 81 +++++++++++++++++++------------------
 tools/perf/bench/Build      |  1 +
 tools/perf/bench/bench.h    |  1 +
 tools/perf/builtin-bench.c  |  1 +
 5 files changed, 47 insertions(+), 40 deletions(-)

-- 
2.26.2.526.g744177e7f7-goog


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] perf bench: add kallsyms parsing
  2020-04-30 19:35 [PATCH 0/2] Improvements for kallsyms__parse Ian Rogers
@ 2020-04-30 19:35 ` Ian Rogers
  2020-04-30 19:35 ` [PATCH 2/2] lib kallsyms: parse using io api Ian Rogers
  1 sibling, 0 replies; 6+ messages in thread
From: Ian Rogers @ 2020-04-30 19:35 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Thomas Gleixner, linux-kernel
  Cc: Stephane Eranian, Ian Rogers

Add a benchmark for kallsyms parsing. Example output:

  Running 'internals/kallsyms-parse' benchmark:
  Average kallsyms__parse took: 103.971 ms (+- 0.121 ms)

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/bench/Build            |  1 +
 tools/perf/bench/bench.h          |  1 +
 tools/perf/bench/kallsyms-parse.c | 75 +++++++++++++++++++++++++++++++
 tools/perf/builtin-bench.c        |  1 +
 4 files changed, 78 insertions(+)
 create mode 100644 tools/perf/bench/kallsyms-parse.c

diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 042827385c87..768e408757a0 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -9,6 +9,7 @@ perf-y += futex-lock-pi.o
 perf-y += epoll-wait.o
 perf-y += epoll-ctl.o
 perf-y += synthesize.o
+perf-y += kallsyms-parse.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4d669c803237..61cae4966cae 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -44,6 +44,7 @@ int bench_futex_lock_pi(int argc, const char **argv);
 int bench_epoll_wait(int argc, const char **argv);
 int bench_epoll_ctl(int argc, const char **argv);
 int bench_synthesize(int argc, const char **argv);
+int bench_kallsyms_parse(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
 #define BENCH_FORMAT_DEFAULT		0
diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c
new file mode 100644
index 000000000000..e36e11f410c9
--- /dev/null
+++ b/tools/perf/bench/kallsyms-parse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of /proc/kallsyms parsing.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+#include <symbol/kallsyms.h>
+
+static unsigned int iterations = 100;
+
+static const struct option options[] = {
+	OPT_UINTEGER('i', "iterations", &iterations,
+		"Number of iterations used to compute average"),
+	OPT_END()
+};
+
+static const char *const bench_usage[] = {
+	"perf bench internals kallsyms-parse <options>",
+	NULL
+};
+
+static int bench_process_symbol(void *arg __maybe_unused,
+				const char *name __maybe_unused,
+				char type __maybe_unused,
+				u64 start __maybe_unused)
+{
+	return 0;
+}
+
+static int do_kallsyms_parse(void)
+{
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	unsigned int i;
+	double time_average, time_stddev;
+	int err;
+	struct stats time_stats;
+
+	init_stats(&time_stats);
+
+	for (i = 0; i < iterations; i++) {
+		gettimeofday(&start, NULL);
+		err = kallsyms__parse("/proc/kallsyms", NULL,
+				bench_process_symbol);
+		if (err)
+			return err;
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+	}
+
+	time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+	time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+	printf("  Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n",
+		time_average, time_stddev);
+	return 0;
+}
+
+int bench_kallsyms_parse(int argc, const char **argv)
+{
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	return do_kallsyms_parse();
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 11c79a8d85d6..083273209c88 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -78,6 +78,7 @@ static struct bench epoll_benchmarks[] = {
 
 static struct bench internals_benchmarks[] = {
 	{ "synthesize", "Benchmark perf event synthesis",	bench_synthesize	},
+	{ "kallsyms-parse", "Benchmark kallsyms parsing",	bench_kallsyms_parse	},
 	{ NULL,		NULL,					NULL			}
 };
 
-- 
2.26.2.526.g744177e7f7-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] lib kallsyms: parse using io api
  2020-04-30 19:35 [PATCH 0/2] Improvements for kallsyms__parse Ian Rogers
  2020-04-30 19:35 ` [PATCH] perf bench: add kallsyms parsing Ian Rogers
@ 2020-04-30 19:35 ` Ian Rogers
  2020-05-01 12:23   ` Jiri Olsa
  1 sibling, 1 reply; 6+ messages in thread
From: Ian Rogers @ 2020-04-30 19:35 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Thomas Gleixner, linux-kernel
  Cc: Stephane Eranian, Ian Rogers

Perf record will call kallsyms__parse 4 times during startup and process
megabytes of data. This changes kallsyms__parse to use the io library
rather than fgets to improve performance of the user code by over 8%.

Before:
  Running 'internals/kallsyms-parse' benchmark:
  Average kallsyms__parse took: 103.988 ms (+- 0.203 ms)
After:
  Running 'internals/kallsyms-parse' benchmark:
  Average kallsyms__parse took: 95.571 ms (+- 0.006 ms)

For a workload like:
$ perf record /bin/true
Run under 'perf record -e cycles:u -g' the time goes from:
Before
30.10%     1.67%  perf     perf                [.] kallsyms__parse
After
25.55%    20.04%  perf     perf                [.] kallsyms__parse
So a little under 5% of the start-up time is removed. A lot of what
remains is on the kernel side, but caching kallsyms within perf would
at least impact memory footprint.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/api/io.h          |  3 ++
 tools/lib/symbol/kallsyms.c | 81 +++++++++++++++++++------------------
 2 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
index b7e55b5f8a4a..777c20f6b604 100644
--- a/tools/lib/api/io.h
+++ b/tools/lib/api/io.h
@@ -7,6 +7,9 @@
 #ifndef __API_IO__
 #define __API_IO__
 
+#include <stdlib.h>
+#include <unistd.h>
+
 struct io {
 	/* File descriptor being read/ */
 	int fd;
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 1a7a9f877095..1ef61b68332a 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "symbol/kallsyms.h"
+#include "api/io.h"
 #include <stdio.h>
-#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 u8 kallsyms2elf_type(char type)
 {
@@ -28,61 +30,60 @@ int hex2u64(const char *ptr, u64 *long_val)
 	return p - ptr;
 }
 
+static void read_to_eol(struct io *io)
+{
+	int ch;
+
+	for (;;) {
+		ch = io__get_char(io);
+		if (ch < 0 || ch == '\n')
+			return;
+	}
+}
+
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start))
 {
-	char *line = NULL;
-	size_t n;
-	int err = -1;
-	FILE *file = fopen(filename, "r");
+	struct io io;
+	char bf[BUFSIZ];
 
-	if (file == NULL)
-		goto out_failure;
+	io.fd = open(filename, O_RDONLY, 0);
 
-	err = 0;
-
-	while (!feof(file)) {
-		u64 start;
-		int line_len, len;
-		char symbol_type;
-		char *symbol_name;
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0 || !line)
-			break;
+	if (io.fd < 0)
+		return -1;
 
-		line[--line_len] = '\0'; /* \n */
+	io__init(&io, io.fd, bf, sizeof(bf));
 
-		len = hex2u64(line, &start);
+	while (!io.eof) {
+		__u64 start;
+		int ch, err;
+		size_t i;
+		char symbol_type;
+		char symbol_name[KSYM_NAME_LEN + 1];
 
-		/* Skip the line if we failed to parse the address. */
-		if (!len)
+		if (io__get_hex(&io, &start) != ' ') {
+			read_to_eol(&io);
 			continue;
-
-		len++;
-		if (len + 2 >= line_len)
+		}
+		symbol_type = io__get_char(&io);
+		if (io__get_char(&io) != ' ') {
+			read_to_eol(&io);
 			continue;
-
-		symbol_type = line[len];
-		len += 2;
-		symbol_name = line + len;
-		len = line_len - len;
-
-		if (len >= KSYM_NAME_LEN) {
-			err = -1;
-			break;
 		}
+		for (i = 0; i < sizeof(symbol_name); i++) {
+			ch = io__get_char(&io);
+			if (ch < 0 || ch == '\n')
+				break;
+			symbol_name[i]  = ch;
+		}
+		symbol_name[i]  = '\0';
 
 		err = process_symbol(arg, symbol_name, symbol_type, start);
 		if (err)
 			break;
 	}
 
-	free(line);
-	fclose(file);
-	return err;
-
-out_failure:
-	return -1;
+	close(io.fd);
+	return 0;
 }
-- 
2.26.2.526.g744177e7f7-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] lib kallsyms: parse using io api
  2020-04-30 19:35 ` [PATCH 2/2] lib kallsyms: parse using io api Ian Rogers
@ 2020-05-01 12:23   ` Jiri Olsa
  2020-05-01 15:28     ` Ian Rogers
  0 siblings, 1 reply; 6+ messages in thread
From: Jiri Olsa @ 2020-05-01 12:23 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Namhyung Kim, Thomas Gleixner,
	linux-kernel, Stephane Eranian

On Thu, Apr 30, 2020 at 12:35:57PM -0700, Ian Rogers wrote:
> Perf record will call kallsyms__parse 4 times during startup and process
> megabytes of data. This changes kallsyms__parse to use the io library
> rather than fgets to improve performance of the user code by over 8%.
> 
> Before:
>   Running 'internals/kallsyms-parse' benchmark:
>   Average kallsyms__parse took: 103.988 ms (+- 0.203 ms)
> After:
>   Running 'internals/kallsyms-parse' benchmark:
>   Average kallsyms__parse took: 95.571 ms (+- 0.006 ms)
> 
> For a workload like:
> $ perf record /bin/true
> Run under 'perf record -e cycles:u -g' the time goes from:
> Before
> 30.10%     1.67%  perf     perf                [.] kallsyms__parse
> After
> 25.55%    20.04%  perf     perf                [.] kallsyms__parse
> So a little under 5% of the start-up time is removed. A lot of what
> remains is on the kernel side, but caching kallsyms within perf would
> at least impact memory footprint.

with your change I'm getting following warnings:

$ sudo ./perf record -a
Couldn't record kernel reference relocation symbol
Symbol resolution may be skewed if relocation was used (e.g. kexec).
Check /proc/kallsyms permission or run as root.

> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/lib/api/io.h          |  3 ++
>  tools/lib/symbol/kallsyms.c | 81 +++++++++++++++++++------------------
>  2 files changed, 44 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
> index b7e55b5f8a4a..777c20f6b604 100644
> --- a/tools/lib/api/io.h
> +++ b/tools/lib/api/io.h
> @@ -7,6 +7,9 @@
>  #ifndef __API_IO__
>  #define __API_IO__
>  
> +#include <stdlib.h>
> +#include <unistd.h>

was this missing?

jirka

> +
>  struct io {
>  	/* File descriptor being read/ */
>  	int fd;

SNIP


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] lib kallsyms: parse using io api
  2020-05-01 12:23   ` Jiri Olsa
@ 2020-05-01 15:28     ` Ian Rogers
  2020-05-01 22:15       ` Ian Rogers
  0 siblings, 1 reply; 6+ messages in thread
From: Ian Rogers @ 2020-05-01 15:28 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Namhyung Kim, Thomas Gleixner,
	LKML, Stephane Eranian

On Fri, May 1, 2020 at 5:23 AM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Thu, Apr 30, 2020 at 12:35:57PM -0700, Ian Rogers wrote:
> > Perf record will call kallsyms__parse 4 times during startup and process
> > megabytes of data. This changes kallsyms__parse to use the io library
> > rather than fgets to improve performance of the user code by over 8%.
> >
> > Before:
> >   Running 'internals/kallsyms-parse' benchmark:
> >   Average kallsyms__parse took: 103.988 ms (+- 0.203 ms)
> > After:
> >   Running 'internals/kallsyms-parse' benchmark:
> >   Average kallsyms__parse took: 95.571 ms (+- 0.006 ms)
> >
> > For a workload like:
> > $ perf record /bin/true
> > Run under 'perf record -e cycles:u -g' the time goes from:
> > Before
> > 30.10%     1.67%  perf     perf                [.] kallsyms__parse
> > After
> > 25.55%    20.04%  perf     perf                [.] kallsyms__parse
> > So a little under 5% of the start-up time is removed. A lot of what
> > remains is on the kernel side, but caching kallsyms within perf would
> > at least impact memory footprint.
>
> with your change I'm getting following warnings:
>
> $ sudo ./perf record -a
> Couldn't record kernel reference relocation symbol
> Symbol resolution may be skewed if relocation was used (e.g. kexec).
> Check /proc/kallsyms permission or run as root.

I'll investigate, sorry in advance for sending this out too early.

> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/lib/api/io.h          |  3 ++
> >  tools/lib/symbol/kallsyms.c | 81 +++++++++++++++++++------------------
> >  2 files changed, 44 insertions(+), 40 deletions(-)
> >
> > diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
> > index b7e55b5f8a4a..777c20f6b604 100644
> > --- a/tools/lib/api/io.h
> > +++ b/tools/lib/api/io.h
> > @@ -7,6 +7,9 @@
> >  #ifndef __API_IO__
> >  #define __API_IO__
> >
> > +#include <stdlib.h>
> > +#include <unistd.h>
>
> was this missing?

Yes, they were getting picked up by a transitive #include in
synthesize-events.c, but given the call to read and use of size_t are
here it makes sense for the #includes to be here.

Thanks,
Ian

> jirka
>
> > +
> >  struct io {
> >       /* File descriptor being read/ */
> >       int fd;
>
> SNIP
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] lib kallsyms: parse using io api
  2020-05-01 15:28     ` Ian Rogers
@ 2020-05-01 22:15       ` Ian Rogers
  0 siblings, 0 replies; 6+ messages in thread
From: Ian Rogers @ 2020-05-01 22:15 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Namhyung Kim, Thomas Gleixner,
	LKML, Stephane Eranian

On Fri, May 1, 2020 at 8:28 AM Ian Rogers <irogers@google.com> wrote:
>
> On Fri, May 1, 2020 at 5:23 AM Jiri Olsa <jolsa@redhat.com> wrote:
> >
> > On Thu, Apr 30, 2020 at 12:35:57PM -0700, Ian Rogers wrote:
> > > Perf record will call kallsyms__parse 4 times during startup and process
> > > megabytes of data. This changes kallsyms__parse to use the io library
> > > rather than fgets to improve performance of the user code by over 8%.
> > >
> > > Before:
> > >   Running 'internals/kallsyms-parse' benchmark:
> > >   Average kallsyms__parse took: 103.988 ms (+- 0.203 ms)
> > > After:
> > >   Running 'internals/kallsyms-parse' benchmark:
> > >   Average kallsyms__parse took: 95.571 ms (+- 0.006 ms)
> > >
> > > For a workload like:
> > > $ perf record /bin/true
> > > Run under 'perf record -e cycles:u -g' the time goes from:
> > > Before
> > > 30.10%     1.67%  perf     perf                [.] kallsyms__parse
> > > After
> > > 25.55%    20.04%  perf     perf                [.] kallsyms__parse
> > > So a little under 5% of the start-up time is removed. A lot of what
> > > remains is on the kernel side, but caching kallsyms within perf would
> > > at least impact memory footprint.
> >
> > with your change I'm getting following warnings:
> >
> > $ sudo ./perf record -a
> > Couldn't record kernel reference relocation symbol
> > Symbol resolution may be skewed if relocation was used (e.g. kexec).
> > Check /proc/kallsyms permission or run as root.
>
> I'll investigate, sorry in advance for sending this out too early.

It was an issue with the result in the case of success, it should be
the last result of the function pointer and not 0. Sorry about not
spotting that. Fixed in:
https://lore.kernel.org/lkml/20200501221315.54715-1-irogers@google.com/T/#md59bc11419ca5adc8ab29fa3460288329e8dca91

Thanks,
Ian

> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > >  tools/lib/api/io.h          |  3 ++
> > >  tools/lib/symbol/kallsyms.c | 81 +++++++++++++++++++------------------
> > >  2 files changed, 44 insertions(+), 40 deletions(-)
> > >
> > > diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
> > > index b7e55b5f8a4a..777c20f6b604 100644
> > > --- a/tools/lib/api/io.h
> > > +++ b/tools/lib/api/io.h
> > > @@ -7,6 +7,9 @@
> > >  #ifndef __API_IO__
> > >  #define __API_IO__
> > >
> > > +#include <stdlib.h>
> > > +#include <unistd.h>
> >
> > was this missing?
>
> Yes, they were getting picked up by a transitive #include in
> synthesize-events.c, but given the call to read and use of size_t are
> here it makes sense for the #includes to be here.
>
> Thanks,
> Ian
>
> > jirka
> >
> > > +
> > >  struct io {
> > >       /* File descriptor being read/ */
> > >       int fd;
> >
> > SNIP
> >

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-05-01 22:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-30 19:35 [PATCH 0/2] Improvements for kallsyms__parse Ian Rogers
2020-04-30 19:35 ` [PATCH] perf bench: add kallsyms parsing Ian Rogers
2020-04-30 19:35 ` [PATCH 2/2] lib kallsyms: parse using io api Ian Rogers
2020-05-01 12:23   ` Jiri Olsa
2020-05-01 15:28     ` Ian Rogers
2020-05-01 22:15       ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).