All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf bench: add benchmark for evlist open/close operations
@ 2021-08-09 20:11 Riccardo Mancini
  2021-08-09 20:23 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 6+ messages in thread
From: Riccardo Mancini @ 2021-08-09 20:11 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ian Rogers, Namhyung Kim, Peter Zijlstra, Ingo Molnar,
	Mark Rutland, Jiri Olsa, linux-kernel, linux-perf-users,
	Riccardo Mancini

This new benchmark finds the total time that is taken to open, mmap,
enable, disable, munmap, close an evlist (time taken for new,
create_maps, config, delete is not counted in).
The evlist can be configured as in perf-record using the
-a,-C,-e,-u,--per-thread,-t,-p options.
The events can be duplicated in the evlist to quickly test performance
with many events using the -n options.
Furthermore, also the number of iterations used to calculate the
statistics is customizable.

Examples:
- Open one dummy event system-wide:
$ sudo ./perf bench internals evlist-open-close
  Number of cpus:       4
  Number of threads:    1
  Number of events:     1 (4 fds)
  Number of iterations: 100
  Average open-close took: 613.870 usec (+- 32.852 usec)

- Open the group '{cs,cycles}' on CPU 0
$ sudo ./perf bench internals evlist-open-close -e '{cs,cycles}' -C 0
  Number of cpus:       1
  Number of threads:    1
  Number of events:     2 (2 fds)
  Number of iterations: 100
  Average open-close took: 8503.220 usec (+- 252.652 usec)

- Open 10 'cycles' events for user 0, calculate average over 100 runs
$ sudo ./perf bench internals evlist-open-close -e cycles -n 10 -u 0 -i 100
  Number of cpus:       4
  Number of threads:    328
  Number of events:     10 (13120 fds)
  Number of iterations: 100
  Average open-close took: 180043.140 usec (+- 2295.889 usec)

Signed-off-by: Riccardo Mancini <rickyman7@gmail.com>
---
 tools/perf/bench/Build               |   1 +
 tools/perf/bench/bench.h             |   1 +
 tools/perf/bench/evlist-open-close.c | 275 +++++++++++++++++++++++++++
 tools/perf/builtin-bench.c           |   1 +
 4 files changed, 278 insertions(+)
 create mode 100644 tools/perf/bench/evlist-open-close.c

diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e43f46931b41b78f..61d45fcb4057c945 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -13,6 +13,7 @@ perf-y += synthesize.o
 perf-y += kallsyms-parse.o
 perf-y += find-bit-bench.o
 perf-y += inject-buildid.o
+perf-y += evlist-open-close.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index eac36afab2b39fa5..b3480bc33fe84885 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -48,6 +48,7 @@ int bench_epoll_ctl(int argc, const char **argv);
 int bench_synthesize(int argc, const char **argv);
 int bench_kallsyms_parse(int argc, const char **argv);
 int bench_inject_build_id(int argc, const char **argv);
+int bench_evlist_open_close(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
 #define BENCH_FORMAT_DEFAULT		0
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
new file mode 100644
index 0000000000000000..40bce06f5ca7bef3
--- /dev/null
+++ b/tools/perf/bench/evlist-open-close.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/stat.h"
+#include "../util/evlist.h"
+#include "../util/evsel.h"
+#include "../util/strbuf.h"
+#include "../util/record.h"
+#include "../util/parse-events.h"
+#include "internal/threadmap.h"
+#include "internal/cpumap.h"
+#include <linux/perf_event.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/string.h>
+#include <subcmd/parse-options.h>
+
+#define MMAP_FLUSH_DEFAULT 1
+
+static int iterations = 100;
+static int nr_events = 1;
+static const char *event_string = "dummy";
+
+static struct record_opts opts = {
+	.sample_time	     = true,
+	.mmap_pages	     = UINT_MAX,
+	.user_freq	     = UINT_MAX,
+	.user_interval	     = ULLONG_MAX,
+	.freq		     = 4000,
+	.target		     = {
+		.uses_mmap   = true,
+		.default_per_cpu = true,
+	},
+	.mmap_flush          = MMAP_FLUSH_DEFAULT,
+	.nr_threads_synthesize = 1,
+	.ctl_fd              = -1,
+	.ctl_fd_ack          = -1,
+};
+
+static const struct option options[] = {
+	OPT_STRING('e', "event", &event_string, "event",
+		     "event selector. use 'perf list' to list available events"),
+	OPT_INTEGER('n', "nr-events", &nr_events,
+		     "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"),
+	OPT_INTEGER('i', "iterations", &iterations,
+		"Number of iterations used to compute average (default=100)"),
+	OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide,
+		"system-wide collection from all CPUs"),
+	OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu",
+		    "list of cpus where to open events"),
+	OPT_STRING('p', "pid", &opts.target.pid, "pid",
+		    "record events on existing process id"),
+	OPT_STRING('t', "tid", &opts.target.tid, "tid",
+		    "record events on existing thread id"),
+	OPT_STRING('u', "uid", &opts.target.uid_str, "user",
+		   "user to profile"),
+	OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread,
+		    "use per-thread mmaps"),
+	OPT_END()
+};
+
+static const char *const bench_usage[] = {
+	"perf bench internals evlist-open-close <options>",
+	NULL
+};
+
+static int evlist__count_evsel_fds(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	int cnt = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+	}
+
+	return cnt;
+}
+
+static struct evlist *bench__create_evlist(char *evstr)
+{
+	struct evlist *evlist;
+	struct parse_events_error err;
+	int ret;
+
+	evlist = evlist__new();
+	if (!evlist) {
+		pr_err("Not enough memory to create evlist\n");
+		return NULL;
+	}
+
+	bzero(&err, sizeof(err));
+	ret = parse_events(evlist, evstr, &err);
+	if (ret) {
+		parse_events_print_error(&err, evstr);
+		pr_err("Run 'perf list' for a list of valid events\n");
+		ret = 1;
+		goto out_delete_evlist;
+	}
+
+	ret = evlist__create_maps(evlist, &opts.target);
+	if (ret < 0) {
+		pr_err("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	evlist__config(evlist, &opts, NULL);
+
+	return evlist;
+
+out_delete_evlist:
+	evlist__delete(evlist);
+	return NULL;
+}
+
+static int bench__do_evlist_open_close(struct evlist *evlist)
+{
+	int err = -1;
+	char sbuf[STRERR_BUFSIZE];
+
+	err = evlist__open(evlist);
+	if (err < 0) {
+		pr_err("evlist__open: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		return err;
+	}
+
+	err = evlist__mmap(evlist, opts.mmap_pages);
+	if (err < 0) {
+		pr_err("evlist__mmap: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		return err;
+	}
+
+	evlist__enable(evlist);
+
+	evlist__disable(evlist);
+	evlist__munmap(evlist);
+	evlist__close(evlist);
+
+	return 0;
+}
+
+static int bench_evlist_open_close__run(char *evstr)
+{
+	struct evlist *evlist;
+	struct timeval start, end, diff;
+	double time_average, time_stddev;
+	int i;
+	int err;
+	u64 runtime_us;
+	struct stats time_stats;
+
+	init_stats(&time_stats);
+
+	// used to print statistics only
+	evlist = bench__create_evlist(evstr);
+
+	printf("  Number of cpus:\t%d\n", evlist->core.cpus->nr);
+	printf("  Number of threads:\t%d\n", evlist->core.threads->nr);
+	printf("  Number of events:\t%d (%d fds)\n",
+		evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
+	printf("  Number of iterations:\t%d\n", iterations);
+
+	evlist__delete(evlist);
+
+	for (i = 0; i < iterations; i++) {
+		pr_debug("Started iteration %d\n", i);
+		evlist = bench__create_evlist(evstr);
+		gettimeofday(&start, NULL);
+		err = bench__do_evlist_open_close(evlist);
+		if (err) {
+			evlist__delete(evlist);
+			return err;
+		}
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+
+		evlist__delete(evlist);
+		pr_debug("Iteration %d took:\t%ldus\n", i, runtime_us);
+	}
+
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("  Average open-close took: %.3f usec (+- %.3f usec)\n",
+		time_average, time_stddev);
+
+	return 0;
+}
+
+static char *bench__repeat_event_string(const char *evstr, int n)
+{
+	int err, i, final_size, str_size;
+	struct strbuf buf;
+	char sbuf[STRERR_BUFSIZE];
+
+	str_size = strlen(evstr);
+	final_size = str_size * n + n;
+
+	err = strbuf_init(&buf, final_size);
+	if (err) {
+		pr_err("strbuf_init: %s\n",
+			 str_error_r(err, sbuf, sizeof(sbuf)));
+		goto out_error;
+	}
+
+	for (i = 0; i < n; i++) {
+		err = strbuf_add(&buf, evstr, str_size);
+		if (err) {
+			pr_err("strbuf_add: %s\n",
+				str_error_r(err, sbuf, sizeof(sbuf)));
+			goto out_error;
+		}
+
+		err = strbuf_addch(&buf, i == n-1 ? '\0' : ',');
+		if (err) {
+			pr_err("strbuf_addch: %s\n",
+				str_error_r(err, sbuf, sizeof(sbuf)));
+			goto out_error;
+		}
+	}
+
+	return strbuf_detach(&buf, NULL);
+
+out_error:
+	strbuf_release(&buf);
+	return NULL;
+}
+
+
+int bench_evlist_open_close(int argc, const char **argv)
+{
+	int err = 0;
+	char *evstr, errbuf[BUFSIZ];
+
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	err = target__validate(&opts.target);
+	if (err) {
+		target__strerror(&opts.target, err, errbuf, BUFSIZ);
+		pr_err("%s\n", errbuf);
+		goto out;
+	}
+
+	err = target__parse_uid(&opts.target);
+	if (err) {
+		target__strerror(&opts.target, err, errbuf, BUFSIZ);
+		pr_err("%s", errbuf);
+		goto out;
+	}
+
+	/* Enable ignoring missing threads when -u/-p option is defined. */
+	opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid;
+
+	evstr = bench__repeat_event_string(event_string, nr_events);
+	if (!evstr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = bench_evlist_open_close__run(evstr);
+
+	free(evstr);
+out:
+	return err;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 62a7b7420a448517..d0895162c2ba6411 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -88,6 +88,7 @@ static struct bench internals_benchmarks[] = {
 	{ "synthesize", "Benchmark perf event synthesis",	bench_synthesize	},
 	{ "kallsyms-parse", "Benchmark kallsyms parsing",	bench_kallsyms_parse	},
 	{ "inject-build-id", "Benchmark build-id injection",	bench_inject_build_id	},
+	{ "evlist-open-close", "Benchmark evlist open and close",	bench_evlist_open_close	},
 	{ NULL,		NULL,					NULL			}
 };
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-08-10 14:07 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-09 20:11 [PATCH] perf bench: add benchmark for evlist open/close operations Riccardo Mancini
2021-08-09 20:23 ` Arnaldo Carvalho de Melo
2021-08-09 20:28   ` Arnaldo Carvalho de Melo
2021-08-10 10:31     ` Riccardo Mancini
2021-08-10 14:03       ` Arnaldo Carvalho de Melo
2021-08-10 14:07         ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.