All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/3] perf synthetic events
@ 2020-04-15  5:40 Ian Rogers
  2020-04-15  5:40 ` [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark Ian Rogers
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Ian Rogers @ 2020-04-15  5:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Petr Mladek, Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner,
	Kan Liang, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

Add a multi-threaded version of the internals synthesize benchmark. It
attempts to compute a time per event synthesized, but as it is reading
/proc there are issues:
 - permissions if not run as root
 - "random" nature of /proc
 
By default the benchmark is disabled but can be enabled with a
flag. It has been useful in gauging the value of multi-threaded
improvements not included here as their value appears minimal.

The patch set includes 2 patches that improve synthesis performance
and updates the benchmark numbers:
https://lore.kernel.org/lkml/20200411064248.247530-1-irogers@google.com/

v4 added a missing test file
v3 improved documenation, return values and added testing to the io framework
   following feedback from namhyung@kernel.org.
v2 addressed single threaded synthesize benchmark issues from jolsa@redhat.com
https://lore.kernel.org/lkml/20200402154357.107873-1-irogers@google.com/

Ian Rogers (3):
  perf bench: add a multi-threaded synthesize benchmark
  tools api: add a lightweight buffered reading api
  perf synthetic events: Remove use of sscanf from /proc reading

 tools/lib/api/io.h                 | 112 +++++++++++
 tools/perf/bench/synthesize.c      | 211 +++++++++++++++++---
 tools/perf/tests/Build             |   1 +
 tools/perf/tests/api-io.c          | 304 +++++++++++++++++++++++++++++
 tools/perf/tests/builtin-test.c    |   4 +
 tools/perf/tests/tests.h           |   1 +
 tools/perf/util/synthetic-events.c | 157 ++++++++++-----
 7 files changed, 713 insertions(+), 77 deletions(-)
 create mode 100644 tools/lib/api/io.h
 create mode 100644 tools/perf/tests/api-io.c

-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark
  2020-04-15  5:40 [PATCH v5 0/3] perf synthetic events Ian Rogers
@ 2020-04-15  5:40 ` Ian Rogers
  2020-04-23 14:17   ` Arnaldo Carvalho de Melo
  2020-05-08 13:05   ` [tip: perf/core] perf bench: Add " tip-bot2 for Ian Rogers
  2020-04-15  5:40 ` [PATCH v5 2/3] tools api: add a lightweight buffered reading api Ian Rogers
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Ian Rogers @ 2020-04-15  5:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Petr Mladek, Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner,
	Kan Liang, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

By default this isn't run as it reads /proc and may not have access.
For consistency, modify the single threaded benchmark to compute an
average time per event.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/bench/synthesize.c | 211 ++++++++++++++++++++++++++++++----
 1 file changed, 186 insertions(+), 25 deletions(-)

diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 6291257bc9c9..8d624aea1c5e 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -10,60 +10,105 @@
 #include "bench.h"
 #include "../util/debug.h"
 #include "../util/session.h"
+#include "../util/stat.h"
 #include "../util/synthetic-events.h"
 #include "../util/target.h"
 #include "../util/thread_map.h"
 #include "../util/tool.h"
+#include "../util/util.h"
+#include <linux/atomic.h>
 #include <linux/err.h>
 #include <linux/time64.h>
 #include <subcmd/parse-options.h>
 
-static unsigned int iterations = 10000;
+static unsigned int min_threads = 1;
+static unsigned int max_threads = UINT_MAX;
+static unsigned int single_iterations = 10000;
+static unsigned int multi_iterations = 10;
+static bool run_st;
+static bool run_mt;
 
 static const struct option options[] = {
-	OPT_UINTEGER('i', "iterations", &iterations,
-		"Number of iterations used to compute average"),
+	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
+	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
+	OPT_UINTEGER('m', "min-threads", &min_threads,
+		"Minimum number of threads in multithreaded bench"),
+	OPT_UINTEGER('M', "max-threads", &max_threads,
+		"Maximum number of threads in multithreaded bench"),
+	OPT_UINTEGER('i', "single-iterations", &single_iterations,
+		"Number of iterations used to compute single-threaded average"),
+	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
+		"Number of iterations used to compute multi-threaded average"),
 	OPT_END()
 };
 
-static const char *const usage[] = {
+static const char *const bench_usage[] = {
 	"perf bench internals synthesize <options>",
 	NULL
 };
 
+static atomic_t event_count;
 
-static int do_synthesize(struct perf_session *session,
-			struct perf_thread_map *threads,
-			struct target *target, bool data_mmap)
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	atomic_inc(&event_count);
+	return 0;
+}
+
+static int do_run_single_threaded(struct perf_session *session,
+				struct perf_thread_map *threads,
+				struct target *target, bool data_mmap)
 {
 	const unsigned int nr_threads_synthesize = 1;
 	struct timeval start, end, diff;
 	u64 runtime_us;
 	unsigned int i;
-	double average;
+	double time_average, time_stddev, event_average, event_stddev;
 	int err;
+	struct stats time_stats, event_stats;
 
-	gettimeofday(&start, NULL);
-	for (i = 0; i < iterations; i++) {
-		err = machine__synthesize_threads(&session->machines.host,
-						target, threads, data_mmap,
+	init_stats(&time_stats);
+	init_stats(&event_stats);
+
+	for (i = 0; i < single_iterations; i++) {
+		atomic_set(&event_count, 0);
+		gettimeofday(&start, NULL);
+		err = __machine__synthesize_threads(&session->machines.host,
+						NULL,
+						target, threads,
+						process_synthesized_event,
+						data_mmap,
 						nr_threads_synthesize);
 		if (err)
 			return err;
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+		update_stats(&event_stats, atomic_read(&event_count));
 	}
 
-	gettimeofday(&end, NULL);
-	timersub(&end, &start, &diff);
-	runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
-	average = (double)runtime_us/(double)iterations;
-	printf("Average %ssynthesis took: %f usec\n",
-		data_mmap ? "data " : "", average);
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
+		data_mmap ? "data " : "", time_average, time_stddev);
+
+	event_average = avg_stats(&event_stats);
+	event_stddev = stddev_stats(&event_stats);
+	printf("  Average num. events: %.3f (+- %.3f)\n",
+		event_average, event_stddev);
+
+	printf("  Average time per event %.3f usec\n",
+		time_average / event_average);
 	return 0;
 }
 
-int bench_synthesize(int argc, const char **argv)
+static int run_single_threaded(void)
 {
-	struct perf_tool tool;
 	struct perf_session *session;
 	struct target target = {
 		.pid = "self",
@@ -71,8 +116,7 @@ int bench_synthesize(int argc, const char **argv)
 	struct perf_thread_map *threads;
 	int err;
 
-	argc = parse_options(argc, argv, options, usage, 0);
-
+	perf_set_singlethreaded();
 	session = perf_session__new(NULL, false, NULL);
 	if (IS_ERR(session)) {
 		pr_err("Session creation failed.\n");
@@ -84,13 +128,16 @@ int bench_synthesize(int argc, const char **argv)
 		err = -ENOMEM;
 		goto err_out;
 	}
-	perf_tool__fill_defaults(&tool);
 
-	err = do_synthesize(session, threads, &target, false);
+	puts(
+"Computing performance of single threaded perf event synthesis by\n"
+"synthesizing events on the perf process itself:");
+
+	err = do_run_single_threaded(session, threads, &target, false);
 	if (err)
 		goto err_out;
 
-	err = do_synthesize(session, threads, &target, true);
+	err = do_run_single_threaded(session, threads, &target, true);
 
 err_out:
 	if (threads)
@@ -99,3 +146,117 @@ int bench_synthesize(int argc, const char **argv)
 	perf_session__delete(session);
 	return err;
 }
+
+static int do_run_multi_threaded(struct target *target,
+				unsigned int nr_threads_synthesize)
+{
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	unsigned int i;
+	double time_average, time_stddev, event_average, event_stddev;
+	int err;
+	struct stats time_stats, event_stats;
+	struct perf_session *session;
+
+	init_stats(&time_stats);
+	init_stats(&event_stats);
+	for (i = 0; i < multi_iterations; i++) {
+		session = perf_session__new(NULL, false, NULL);
+		if (!session)
+			return -ENOMEM;
+
+		atomic_set(&event_count, 0);
+		gettimeofday(&start, NULL);
+		err = __machine__synthesize_threads(&session->machines.host,
+						NULL,
+						target, NULL,
+						process_synthesized_event,
+						false,
+						nr_threads_synthesize);
+		if (err) {
+			perf_session__delete(session);
+			return err;
+		}
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+		update_stats(&event_stats, atomic_read(&event_count));
+		perf_session__delete(session);
+	}
+
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
+		time_average, time_stddev);
+
+	event_average = avg_stats(&event_stats);
+	event_stddev = stddev_stats(&event_stats);
+	printf("    Average num. events: %.3f (+- %.3f)\n",
+		event_average, event_stddev);
+
+	printf("    Average time per event %.3f usec\n",
+		time_average / event_average);
+	return 0;
+}
+
+static int run_multi_threaded(void)
+{
+	struct target target = {
+		.cpu_list = "0"
+	};
+	unsigned int nr_threads_synthesize;
+	int err;
+
+	if (max_threads == UINT_MAX)
+		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+	puts(
+"Computing performance of multi threaded perf event synthesis by\n"
+"synthesizing events on CPU 0:");
+
+	for (nr_threads_synthesize = min_threads;
+	     nr_threads_synthesize <= max_threads;
+	     nr_threads_synthesize++) {
+		if (nr_threads_synthesize == 1)
+			perf_set_singlethreaded();
+		else
+			perf_set_multithreaded();
+
+		printf("  Number of synthesis threads: %u\n",
+			nr_threads_synthesize);
+
+		err = do_run_multi_threaded(&target, nr_threads_synthesize);
+		if (err)
+			return err;
+	}
+	perf_set_singlethreaded();
+	return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+	int err = 0;
+
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * If neither single threaded or multi-threaded are specified, default
+	 * to running just single threaded.
+	 */
+	if (!run_st && !run_mt)
+		run_st = true;
+
+	if (run_st)
+		err = run_single_threaded();
+
+	if (!err && run_mt)
+		err = run_multi_threaded();
+
+	return err;
+}
-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v5 2/3] tools api: add a lightweight buffered reading api
  2020-04-15  5:40 [PATCH v5 0/3] perf synthetic events Ian Rogers
  2020-04-15  5:40 ` [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark Ian Rogers
@ 2020-04-15  5:40 ` Ian Rogers
  2020-05-08 13:05   ` [tip: perf/core] tools api: Add " tip-bot2 for Ian Rogers
  2020-04-15  5:40 ` [PATCH v5 3/3] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
  2020-04-16 14:19 ` [PATCH v5 0/3] perf synthetic events Namhyung Kim
  3 siblings, 1 reply; 11+ messages in thread
From: Ian Rogers @ 2020-04-15  5:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Petr Mladek, Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner,
	Kan Liang, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

The synthesize benchmark shows the majority of execution time going to
fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
reading library that will be used to replace these calls in a follow-up
CL. Add tests for the library to perf test.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/api/io.h              | 112 ++++++++++++
 tools/perf/tests/Build          |   1 +
 tools/perf/tests/api-io.c       | 304 ++++++++++++++++++++++++++++++++
 tools/perf/tests/builtin-test.c |   4 +
 tools/perf/tests/tests.h        |   1 +
 5 files changed, 422 insertions(+)
 create mode 100644 tools/lib/api/io.h
 create mode 100644 tools/perf/tests/api-io.c

diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 000000000000..b7e55b5f8a4a
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+struct io {
+	/* File descriptor being read/ */
+	int fd;
+	/* Size of the read buffer. */
+	unsigned int buf_len;
+	/* Pointer to storage for buffering read. */
+	char *buf;
+	/* End of the storage. */
+	char *end;
+	/* Currently accessed data pointer. */
+	char *data;
+	/* Set true on when the end of file on read error. */
+	bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+			    char *buf, unsigned int buf_len)
+{
+	io->fd = fd;
+	io->buf_len = buf_len;
+	io->buf = buf;
+	io->end = buf;
+	io->data = buf;
+	io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+	char *ptr = io->data;
+
+	if (io->eof)
+		return -1;
+
+	if (ptr == io->end) {
+		ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+		if (n <= 0) {
+			io->eof = true;
+			return -1;
+		}
+		ptr = &io->buf[0];
+		io->end = &io->buf[n];
+	}
+	io->data = ptr + 1;
+	return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+	bool first_read = true;
+
+	*hex = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*hex = (*hex << 4) | (ch - '0');
+		else if (ch >= 'a' && ch <= 'f')
+			*hex = (*hex << 4) | (ch - 'a' + 10);
+		else if (ch >= 'A' && ch <= 'F')
+			*hex = (*hex << 4) | (ch - 'A' + 10);
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+	bool first_read = true;
+
+	*dec = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*dec = (*dec * 10) + ch - '0';
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index b3d1bf13ca07..c75557aeef0e 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -56,6 +56,7 @@ perf-y += mem2node.o
 perf-y += maps.o
 perf-y += time-utils-test.o
 perf-y += genelf.o
+perf-y += api-io.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
new file mode 100644
index 000000000000..2ada86ad6084
--- /dev/null
+++ b/tools/perf/tests/api-io.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "tests.h"
+#include <api/io.h>
+#include <linux/kernel.h>
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+#define EXPECT_EQUAL(val, expected)                             \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %d != %d\n",			\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+#define EXPECT_EQUAL64(val, expected)                           \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %lld != %lld\n",		\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+static int make_test_file(char path[PATH_MAX], const char *contents)
+{
+	ssize_t contents_len = strlen(contents);
+	int fd;
+
+	strcpy(path, TEMPL);
+	fd = mkstemp(path);
+	if (fd < 0) {
+		pr_debug("mkstemp failed");
+		return -1;
+	}
+	if (write(fd, contents, contents_len) < contents_len) {
+		pr_debug("short write");
+		close(fd);
+		unlink(path);
+		return -1;
+	}
+	close(fd);
+	return 0;
+}
+
+static int setup_test(char path[PATH_MAX], const char *contents,
+		      size_t buf_size, struct io *io)
+{
+	if (make_test_file(path, contents))
+		return -1;
+
+	io->fd = open(path, O_RDONLY);
+	if (io->fd < 0) {
+		pr_debug("Failed to open '%s'\n", path);
+		unlink(path);
+		return -1;
+	}
+	io->buf = malloc(buf_size);
+	if (io->buf == NULL) {
+		pr_debug("Failed to allocate memory");
+		close(io->fd);
+		unlink(path);
+		return -1;
+	}
+	io__init(io, io->fd, io->buf, buf_size);
+	return 0;
+}
+
+static void cleanup_test(char path[PATH_MAX], struct io *io)
+{
+	free(io->buf);
+	close(io->fd);
+	unlink(path);
+}
+
+static int do_test_get_char(const char *test_string, size_t buf_size)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	size_t i;
+
+	if (setup_test(path, test_string, buf_size, &io))
+		return -1;
+
+	for (i = 0; i < strlen(test_string); i++) {
+		ch = io__get_char(&io);
+
+		EXPECT_EQUAL(ch, test_string[i]);
+		EXPECT_EQUAL(io.eof, false);
+	}
+	ch = io__get_char(&io);
+	EXPECT_EQUAL(ch, -1);
+	EXPECT_EQUAL(io.eof, true);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_char(void)
+{
+	int i, ret = 0;
+	size_t j;
+
+	static const char *const test_strings[] = {
+		"12345678abcdef90",
+		"a\nb\nc\nd\n",
+		"\a\b\t\v\f\r",
+	};
+	for (i = 0; i <= 10; i++) {
+		for (j = 0; j < ARRAY_SIZE(test_strings); j++) {
+			if (do_test_get_char(test_strings[j], 1 << i))
+				ret = -1;
+		}
+	}
+	return ret;
+}
+
+static int do_test_get_hex(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 hex;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_hex(void)
+{
+	int ret = 0;
+
+	if (do_test_get_hex("12345678abcdef90",
+				0x12345678abcdef90, -1,
+				0, -1,
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("12345678ABCDEF90;a;b",
+				0x12345678abcdef90, ';',
+				0xa, ';',
+				0xb, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("10000000000000000000000000000abcdefgh99i",
+				0xabcdef, 'g',
+				0, -2,
+				0x99, 'i',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+static int do_test_get_dec(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 dec;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_dec(void)
+{
+	int ret = 0;
+
+	if (do_test_get_dec("12345678abcdef90",
+				12345678, 'a',
+				0, -2,
+				0, -2,
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("12345678;1;2",
+				12345678, ';',
+				1, ';',
+				2, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c",
+				123456789, 'a',
+				0, -2,
+				99, 'c',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+int test__api_io(struct test *test __maybe_unused,
+		int subtest __maybe_unused)
+{
+	int ret = 0;
+
+	if (test_get_char())
+		ret = TEST_FAIL;
+	if (test_get_hex())
+		ret = TEST_FAIL;
+	if (test_get_dec())
+		ret = TEST_FAIL;
+	return ret;
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b6322eb0f423..3471ec52ea11 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -309,6 +309,10 @@ static struct test generic_tests[] = {
 		.desc = "Test jit_write_elf",
 		.func = test__jit_write_elf,
 	},
+	{
+		.desc = "Test api io",
+		.func = test__api_io,
+	},
 	{
 		.desc = "maps__merge_in",
 		.func = test__maps__merge_in,
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 61a1ab032080..d6d4ac34eeb7 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -112,6 +112,7 @@ int test__mem2node(struct test *t, int subtest);
 int test__maps__merge_in(struct test *t, int subtest);
 int test__time_utils(struct test *t, int subtest);
 int test__jit_write_elf(struct test *test, int subtest);
+int test__api_io(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v5 3/3] perf synthetic events: Remove use of sscanf from /proc reading
  2020-04-15  5:40 [PATCH v5 0/3] perf synthetic events Ian Rogers
  2020-04-15  5:40 ` [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark Ian Rogers
  2020-04-15  5:40 ` [PATCH v5 2/3] tools api: add a lightweight buffered reading api Ian Rogers
@ 2020-04-15  5:40 ` Ian Rogers
  2020-05-08 13:05   ` [tip: perf/core] " tip-bot2 for Ian Rogers
  2020-04-16 14:19 ` [PATCH v5 0/3] perf synthetic events Namhyung Kim
  3 siblings, 1 reply; 11+ messages in thread
From: Ian Rogers @ 2020-04-15  5:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Petr Mladek, Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner,
	Kan Liang, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

The synthesize benchmark, run on a single process and thread, shows
perf_event__synthesize_mmap_events as the hottest function with fgets
and sscanf taking the majority of execution time. fscanf performs
similarly well. Replace the scanf call with manual reading of each field
of the /proc/pid/maps line, and remove some unnecessary buffering.
This change also addresses potential, but unlikely, buffer overruns for
the string values read by scanf.

Performance before is:
$ sudo perf bench internals synthesize -m 16 -M 16 -s -t
\# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 102.810 usec (+- 0.027 usec)
  Average num. events: 17.000 (+- 0.000)
  Average time per event 6.048 usec
  Average data synthesis took: 106.325 usec (+- 0.018 usec)
  Average num. events: 89.000 (+- 0.000)
  Average time per event 1.195 usec
Computing performance of multi threaded perf event synthesis by
synthesizing events on CPU 0:
  Number of synthesis threads: 16
    Average synthesis took: 68103.100 usec (+- 441.234 usec)
    Average num. events: 30703.000 (+- 0.730)
    Average time per event 2.218 usec

And after is:
$ sudo perf bench internals synthesize -m 16 -M 16 -s -t
\# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 50.388 usec (+- 0.031 usec)
  Average num. events: 17.000 (+- 0.000)
  Average time per event 2.964 usec
  Average data synthesis took: 52.693 usec (+- 0.020 usec)
  Average num. events: 89.000 (+- 0.000)
  Average time per event 0.592 usec
Computing performance of multi threaded perf event synthesis by
synthesizing events on CPU 0:
  Number of synthesis threads: 16
    Average synthesis took: 45022.400 usec (+- 552.740 usec)
    Average num. events: 30624.200 (+- 10.037)
    Average time per event 1.470 usec

On a Intel Xeon 6154 compiling with Debian gcc 9.2.1.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/synthetic-events.c | 157 +++++++++++++++++++----------
 1 file changed, 105 insertions(+), 52 deletions(-)

diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 9d4aa951eaa6..1ea9adaef9c7 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
+#include <api/io.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -273,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+				u32 *prot, u32 *flags, __u64 *offset,
+				u32 *maj, u32 *min,
+				__u64 *inode,
+				ssize_t pathname_size, char *pathname)
+{
+	__u64 temp;
+	int ch;
+	char *start_pathname = pathname;
+
+	if (io__get_hex(io, start) != '-')
+		return false;
+	if (io__get_hex(io, end) != ' ')
+		return false;
+
+	/* map protection and flags bits */
+	*prot = 0;
+	ch = io__get_char(io);
+	if (ch == 'r')
+		*prot |= PROT_READ;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'w')
+		*prot |= PROT_WRITE;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'x')
+		*prot |= PROT_EXEC;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 's')
+		*flags = MAP_SHARED;
+	else if (ch == 'p')
+		*flags = MAP_PRIVATE;
+	else
+		return false;
+	if (io__get_char(io) != ' ')
+		return false;
+
+	if (io__get_hex(io, offset) != ' ')
+		return false;
+
+	if (io__get_hex(io, &temp) != ':')
+		return false;
+	*maj = temp;
+	if (io__get_hex(io, &temp) != ' ')
+		return false;
+	*min = temp;
+
+	ch = io__get_dec(io, inode);
+	if (ch != ' ') {
+		*pathname = '\0';
+		return ch == '\n';
+	}
+	do {
+		ch = io__get_char(io);
+	} while (ch == ' ');
+	while (true) {
+		if (ch < 0)
+			return false;
+		if (ch == '\0' || ch == '\n' ||
+		    (pathname + 1 - start_pathname) >= pathname_size) {
+			*pathname = '\0';
+			return true;
+		}
+		*pathname++ = ch;
+		ch = io__get_char(io);
+	}
+}
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -280,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       struct machine *machine,
 				       bool mmap_data)
 {
-	FILE *fp;
 	unsigned long long t;
 	char bf[BUFSIZ];
+	struct io io;
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
@@ -295,28 +369,39 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
 		machine->root_dir, pid, pid);
 
-	fp = fopen(bf, "r");
-	if (fp == NULL) {
+	io.fd = open(bf, O_RDONLY, 0);
+	if (io.fd < 0) {
 		/*
 		 * We raced with a task exiting - just return:
 		 */
 		pr_debug("couldn't open %s\n", bf);
 		return -1;
 	}
+	io__init(&io, io.fd, bf, sizeof(bf));
 
 	event->header.type = PERF_RECORD_MMAP2;
 	t = rdclock();
 
-	while (1) {
-		char prot[5];
-		char execname[PATH_MAX];
-		char anonstr[] = "//anon";
-		unsigned int ino;
+	while (!io.eof) {
+		static const char anonstr[] = "//anon";
 		size_t size;
-		ssize_t n;
 
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			break;
+		/* ensure null termination since stack will be reused. */
+		event->mmap2.filename[0] = '\0';
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		if (!read_proc_maps_line(&io,
+					&event->mmap2.start,
+					&event->mmap2.len,
+					&event->mmap2.prot,
+					&event->mmap2.flags,
+					&event->mmap2.pgoff,
+					&event->mmap2.maj,
+					&event->mmap2.min,
+					&event->mmap2.ino,
+					sizeof(event->mmap2.filename),
+					event->mmap2.filename))
+			continue;
 
 		if ((rdclock() - t) > timeout) {
 			pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
@@ -327,23 +412,6 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			goto out;
 		}
 
-		/* ensure null termination since stack will be reused. */
-		strcpy(execname, "");
-
-		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
-		       &event->mmap2.start, &event->mmap2.len, prot,
-		       &event->mmap2.pgoff, &event->mmap2.maj,
-		       &event->mmap2.min,
-		       &ino, execname);
-
-		/*
- 		 * Anon maps don't have the execname.
- 		 */
-		if (n < 7)
-			continue;
-
-		event->mmap2.ino = (u64)ino;
 		event->mmap2.ino_generation = 0;
 
 		/*
@@ -354,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		else
 			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
 
-		/* map protection and flags bits */
-		event->mmap2.prot = 0;
-		event->mmap2.flags = 0;
-		if (prot[0] == 'r')
-			event->mmap2.prot |= PROT_READ;
-		if (prot[1] == 'w')
-			event->mmap2.prot |= PROT_WRITE;
-		if (prot[2] == 'x')
-			event->mmap2.prot |= PROT_EXEC;
-
-		if (prot[3] == 's')
-			event->mmap2.flags |= MAP_SHARED;
-		else
-			event->mmap2.flags |= MAP_PRIVATE;
-
-		if (prot[2] != 'x') {
-			if (!mmap_data || prot[0] != 'r')
+		if ((event->mmap2.prot & PROT_EXEC) == 0) {
+			if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
 				continue;
 
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -380,17 +433,17 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (truncation)
 			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
 
-		if (!strcmp(execname, ""))
-			strcpy(execname, anonstr);
+		if (!strcmp(event->mmap2.filename, ""))
+			strcpy(event->mmap2.filename, anonstr);
 
 		if (hugetlbfs_mnt_len &&
-		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
-			strcpy(execname, anonstr);
+		    !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+			     hugetlbfs_mnt_len)) {
+			strcpy(event->mmap2.filename, anonstr);
 			event->mmap2.flags |= MAP_HUGETLB;
 		}
 
-		size = strlen(execname) + 1;
-		memcpy(event->mmap2.filename, execname, size);
+		size = strlen(event->mmap2.filename) + 1;
 		size = PERF_ALIGN(size, sizeof(u64));
 		event->mmap2.len -= event->mmap.start;
 		event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -409,7 +462,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			break;
 	}
 
-	fclose(fp);
+	close(io.fd);
 	return rc;
 }
 
-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v5 0/3] perf synthetic events
  2020-04-15  5:40 [PATCH v5 0/3] perf synthetic events Ian Rogers
                   ` (2 preceding siblings ...)
  2020-04-15  5:40 ` [PATCH v5 3/3] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
@ 2020-04-16 14:19 ` Namhyung Kim
  2020-04-23 14:24   ` Arnaldo Carvalho de Melo
  3 siblings, 1 reply; 11+ messages in thread
From: Namhyung Kim @ 2020-04-16 14:19 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

Hi Ian,

On Wed, Apr 15, 2020 at 2:40 PM Ian Rogers <irogers@google.com> wrote:
>
> Add a multi-threaded version of the internals synthesize benchmark. It
> attempts to compute a time per event synthesized, but as it is reading
> /proc there are issues:
>  - permissions if not run as root
>  - "random" nature of /proc
>
> By default the benchmark is disabled but can be enabled with a
> flag. It has been useful in gauging the value of multi-threaded
> improvements not included here as their value appears minimal.
>
> The patch set includes 2 patches that improve synthesis performance
> and updates the benchmark numbers:
> https://lore.kernel.org/lkml/20200411064248.247530-1-irogers@google.com/
>
> v4 added a missing test file
> v3 improved documenation, return values and added testing to the io framework
>    following feedback from namhyung@kernel.org.
> v2 addressed single threaded synthesize benchmark issues from jolsa@redhat.com
> https://lore.kernel.org/lkml/20200402154357.107873-1-irogers@google.com/
>
> Ian Rogers (3):
>   perf bench: add a multi-threaded synthesize benchmark
>   tools api: add a lightweight buffered reading api
>   perf synthetic events: Remove use of sscanf from /proc reading

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks
Namhyung

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark
  2020-04-15  5:40 ` [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark Ian Rogers
@ 2020-04-23 14:17   ` Arnaldo Carvalho de Melo
  2020-05-08 13:05   ` [tip: perf/core] perf bench: Add " tip-bot2 for Ian Rogers
  1 sibling, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-04-23 14:17 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, Petr Mladek, Andrey Zhizhikin,
	Kefeng Wang, Thomas Gleixner, Kan Liang, linux-kernel,
	linux-perf-users, Stephane Eranian

Em Tue, Apr 14, 2020 at 10:40:48PM -0700, Ian Rogers escreveu:
> By default this isn't run as it reads /proc and may not have access.
> For consistency, modify the single threaded benchmark to compute an
> average time per event.

Thanks, applied,

- Arnaldo
 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/bench/synthesize.c | 211 ++++++++++++++++++++++++++++++----
>  1 file changed, 186 insertions(+), 25 deletions(-)
> 
> diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
> index 6291257bc9c9..8d624aea1c5e 100644
> --- a/tools/perf/bench/synthesize.c
> +++ b/tools/perf/bench/synthesize.c
> @@ -10,60 +10,105 @@
>  #include "bench.h"
>  #include "../util/debug.h"
>  #include "../util/session.h"
> +#include "../util/stat.h"
>  #include "../util/synthetic-events.h"
>  #include "../util/target.h"
>  #include "../util/thread_map.h"
>  #include "../util/tool.h"
> +#include "../util/util.h"
> +#include <linux/atomic.h>
>  #include <linux/err.h>
>  #include <linux/time64.h>
>  #include <subcmd/parse-options.h>
>  
> -static unsigned int iterations = 10000;
> +static unsigned int min_threads = 1;
> +static unsigned int max_threads = UINT_MAX;
> +static unsigned int single_iterations = 10000;
> +static unsigned int multi_iterations = 10;
> +static bool run_st;
> +static bool run_mt;
>  
>  static const struct option options[] = {
> -	OPT_UINTEGER('i', "iterations", &iterations,
> -		"Number of iterations used to compute average"),
> +	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
> +	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
> +	OPT_UINTEGER('m', "min-threads", &min_threads,
> +		"Minimum number of threads in multithreaded bench"),
> +	OPT_UINTEGER('M', "max-threads", &max_threads,
> +		"Maximum number of threads in multithreaded bench"),
> +	OPT_UINTEGER('i', "single-iterations", &single_iterations,
> +		"Number of iterations used to compute single-threaded average"),
> +	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
> +		"Number of iterations used to compute multi-threaded average"),
>  	OPT_END()
>  };
>  
> -static const char *const usage[] = {
> +static const char *const bench_usage[] = {
>  	"perf bench internals synthesize <options>",
>  	NULL
>  };
>  
> +static atomic_t event_count;
>  
> -static int do_synthesize(struct perf_session *session,
> -			struct perf_thread_map *threads,
> -			struct target *target, bool data_mmap)
> +static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
> +				     union perf_event *event __maybe_unused,
> +				     struct perf_sample *sample __maybe_unused,
> +				     struct machine *machine __maybe_unused)
> +{
> +	atomic_inc(&event_count);
> +	return 0;
> +}
> +
> +static int do_run_single_threaded(struct perf_session *session,
> +				struct perf_thread_map *threads,
> +				struct target *target, bool data_mmap)
>  {
>  	const unsigned int nr_threads_synthesize = 1;
>  	struct timeval start, end, diff;
>  	u64 runtime_us;
>  	unsigned int i;
> -	double average;
> +	double time_average, time_stddev, event_average, event_stddev;
>  	int err;
> +	struct stats time_stats, event_stats;
>  
> -	gettimeofday(&start, NULL);
> -	for (i = 0; i < iterations; i++) {
> -		err = machine__synthesize_threads(&session->machines.host,
> -						target, threads, data_mmap,
> +	init_stats(&time_stats);
> +	init_stats(&event_stats);
> +
> +	for (i = 0; i < single_iterations; i++) {
> +		atomic_set(&event_count, 0);
> +		gettimeofday(&start, NULL);
> +		err = __machine__synthesize_threads(&session->machines.host,
> +						NULL,
> +						target, threads,
> +						process_synthesized_event,
> +						data_mmap,
>  						nr_threads_synthesize);
>  		if (err)
>  			return err;
> +
> +		gettimeofday(&end, NULL);
> +		timersub(&end, &start, &diff);
> +		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
> +		update_stats(&time_stats, runtime_us);
> +		update_stats(&event_stats, atomic_read(&event_count));
>  	}
>  
> -	gettimeofday(&end, NULL);
> -	timersub(&end, &start, &diff);
> -	runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
> -	average = (double)runtime_us/(double)iterations;
> -	printf("Average %ssynthesis took: %f usec\n",
> -		data_mmap ? "data " : "", average);
> +	time_average = avg_stats(&time_stats);
> +	time_stddev = stddev_stats(&time_stats);
> +	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
> +		data_mmap ? "data " : "", time_average, time_stddev);
> +
> +	event_average = avg_stats(&event_stats);
> +	event_stddev = stddev_stats(&event_stats);
> +	printf("  Average num. events: %.3f (+- %.3f)\n",
> +		event_average, event_stddev);
> +
> +	printf("  Average time per event %.3f usec\n",
> +		time_average / event_average);
>  	return 0;
>  }
>  
> -int bench_synthesize(int argc, const char **argv)
> +static int run_single_threaded(void)
>  {
> -	struct perf_tool tool;
>  	struct perf_session *session;
>  	struct target target = {
>  		.pid = "self",
> @@ -71,8 +116,7 @@ int bench_synthesize(int argc, const char **argv)
>  	struct perf_thread_map *threads;
>  	int err;
>  
> -	argc = parse_options(argc, argv, options, usage, 0);
> -
> +	perf_set_singlethreaded();
>  	session = perf_session__new(NULL, false, NULL);
>  	if (IS_ERR(session)) {
>  		pr_err("Session creation failed.\n");
> @@ -84,13 +128,16 @@ int bench_synthesize(int argc, const char **argv)
>  		err = -ENOMEM;
>  		goto err_out;
>  	}
> -	perf_tool__fill_defaults(&tool);
>  
> -	err = do_synthesize(session, threads, &target, false);
> +	puts(
> +"Computing performance of single threaded perf event synthesis by\n"
> +"synthesizing events on the perf process itself:");
> +
> +	err = do_run_single_threaded(session, threads, &target, false);
>  	if (err)
>  		goto err_out;
>  
> -	err = do_synthesize(session, threads, &target, true);
> +	err = do_run_single_threaded(session, threads, &target, true);
>  
>  err_out:
>  	if (threads)
> @@ -99,3 +146,117 @@ int bench_synthesize(int argc, const char **argv)
>  	perf_session__delete(session);
>  	return err;
>  }
> +
> +static int do_run_multi_threaded(struct target *target,
> +				unsigned int nr_threads_synthesize)
> +{
> +	struct timeval start, end, diff;
> +	u64 runtime_us;
> +	unsigned int i;
> +	double time_average, time_stddev, event_average, event_stddev;
> +	int err;
> +	struct stats time_stats, event_stats;
> +	struct perf_session *session;
> +
> +	init_stats(&time_stats);
> +	init_stats(&event_stats);
> +	for (i = 0; i < multi_iterations; i++) {
> +		session = perf_session__new(NULL, false, NULL);
> +		if (!session)
> +			return -ENOMEM;
> +
> +		atomic_set(&event_count, 0);
> +		gettimeofday(&start, NULL);
> +		err = __machine__synthesize_threads(&session->machines.host,
> +						NULL,
> +						target, NULL,
> +						process_synthesized_event,
> +						false,
> +						nr_threads_synthesize);
> +		if (err) {
> +			perf_session__delete(session);
> +			return err;
> +		}
> +
> +		gettimeofday(&end, NULL);
> +		timersub(&end, &start, &diff);
> +		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
> +		update_stats(&time_stats, runtime_us);
> +		update_stats(&event_stats, atomic_read(&event_count));
> +		perf_session__delete(session);
> +	}
> +
> +	time_average = avg_stats(&time_stats);
> +	time_stddev = stddev_stats(&time_stats);
> +	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
> +		time_average, time_stddev);
> +
> +	event_average = avg_stats(&event_stats);
> +	event_stddev = stddev_stats(&event_stats);
> +	printf("    Average num. events: %.3f (+- %.3f)\n",
> +		event_average, event_stddev);
> +
> +	printf("    Average time per event %.3f usec\n",
> +		time_average / event_average);
> +	return 0;
> +}
> +
> +static int run_multi_threaded(void)
> +{
> +	struct target target = {
> +		.cpu_list = "0"
> +	};
> +	unsigned int nr_threads_synthesize;
> +	int err;
> +
> +	if (max_threads == UINT_MAX)
> +		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
> +
> +	puts(
> +"Computing performance of multi threaded perf event synthesis by\n"
> +"synthesizing events on CPU 0:");
> +
> +	for (nr_threads_synthesize = min_threads;
> +	     nr_threads_synthesize <= max_threads;
> +	     nr_threads_synthesize++) {
> +		if (nr_threads_synthesize == 1)
> +			perf_set_singlethreaded();
> +		else
> +			perf_set_multithreaded();
> +
> +		printf("  Number of synthesis threads: %u\n",
> +			nr_threads_synthesize);
> +
> +		err = do_run_multi_threaded(&target, nr_threads_synthesize);
> +		if (err)
> +			return err;
> +	}
> +	perf_set_singlethreaded();
> +	return 0;
> +}
> +
> +int bench_synthesize(int argc, const char **argv)
> +{
> +	int err = 0;
> +
> +	argc = parse_options(argc, argv, options, bench_usage, 0);
> +	if (argc) {
> +		usage_with_options(bench_usage, options);
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	/*
> +	 * If neither single threaded or multi-threaded are specified, default
> +	 * to running just single threaded.
> +	 */
> +	if (!run_st && !run_mt)
> +		run_st = true;
> +
> +	if (run_st)
> +		err = run_single_threaded();
> +
> +	if (!err && run_mt)
> +		err = run_multi_threaded();
> +
> +	return err;
> +}
> -- 
> 2.26.0.110.g2183baf09c-goog
> 

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v5 0/3] perf synthetic events
  2020-04-16 14:19 ` [PATCH v5 0/3] perf synthetic events Namhyung Kim
@ 2020-04-23 14:24   ` Arnaldo Carvalho de Melo
  2020-04-24  7:42     ` Jiri Olsa
  0 siblings, 1 reply; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2020-04-23 14:24 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Namhyung Kim, Ian Rogers, Peter Zijlstra, Ingo Molnar,
	Mark Rutland, Alexander Shishkin, Petr Mladek, Andrey Zhizhikin,
	Kefeng Wang, Thomas Gleixner, Kan Liang, linux-kernel,
	linux-perf-users, Stephane Eranian

Em Thu, Apr 16, 2020 at 11:19:19PM +0900, Namhyung Kim escreveu:
> On Wed, Apr 15, 2020 at 2:40 PM Ian Rogers <irogers@google.com> wrote:
> > v4 added a missing test file
> > v3 improved documenation, return values and added testing to the io framework
> >    following feedback from namhyung@kernel.org.
> > v2 addressed single threaded synthesize benchmark issues from jolsa@redhat.com
> > https://lore.kernel.org/lkml/20200402154357.107873-1-irogers@google.com/
> >
> > Ian Rogers (3):
> >   perf bench: add a multi-threaded synthesize benchmark
> >   tools api: add a lightweight buffered reading api
> >   perf synthetic events: Remove use of sscanf from /proc reading
> 
> Acked-by: Namhyung Kim <namhyung@kernel.org>

Jiri, you seem to be ok with it, can I get your Acked-by or Reviewed-by
as well?

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v5 0/3] perf synthetic events
  2020-04-23 14:24   ` Arnaldo Carvalho de Melo
@ 2020-04-24  7:42     ` Jiri Olsa
  0 siblings, 0 replies; 11+ messages in thread
From: Jiri Olsa @ 2020-04-24  7:42 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Namhyung Kim, Ian Rogers, Peter Zijlstra, Ingo Molnar,
	Mark Rutland, Alexander Shishkin, Petr Mladek, Andrey Zhizhikin,
	Kefeng Wang, Thomas Gleixner, Kan Liang, linux-kernel,
	linux-perf-users, Stephane Eranian

On Thu, Apr 23, 2020 at 11:24:31AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Apr 16, 2020 at 11:19:19PM +0900, Namhyung Kim escreveu:
> > On Wed, Apr 15, 2020 at 2:40 PM Ian Rogers <irogers@google.com> wrote:
> > > v4 added a missing test file
> > > v3 improved documenation, return values and added testing to the io framework
> > >    following feedback from namhyung@kernel.org.
> > > v2 addressed single threaded synthesize benchmark issues from jolsa@redhat.com
> > > https://lore.kernel.org/lkml/20200402154357.107873-1-irogers@google.com/
> > >
> > > Ian Rogers (3):
> > >   perf bench: add a multi-threaded synthesize benchmark
> > >   tools api: add a lightweight buffered reading api
> > >   perf synthetic events: Remove use of sscanf from /proc reading
> > 
> > Acked-by: Namhyung Kim <namhyung@kernel.org>
> 
> Jiri, you seem to be ok with it, can I get your Acked-by or Reviewed-by
> as well?

yep, it's good for me.. and I just found out that the new
version has tests for io functions, great! :-)

Acked-by: Jiri Olsa <jolsa@redhat.com>

thanks,
jirka


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip: perf/core] tools api: Add a lightweight buffered reading api
  2020-04-15  5:40 ` [PATCH v5 2/3] tools api: add a lightweight buffered reading api Ian Rogers
@ 2020-05-08 13:05   ` tip-bot2 for Ian Rogers
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot2 for Ian Rogers @ 2020-05-08 13:05 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ian Rogers, Jiri Olsa, Namhyung Kim, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Andrey Zhizhikin, Kan Liang, Kefeng Wang,
	Mark Rutland, Peter Zijlstra, Petr Mladek, Stephane Eranian,
	Thomas Gleixner, x86, LKML

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     e95770af4c4a280fab2080529d30452a7628d45d
Gitweb:        https://git.kernel.org/tip/e95770af4c4a280fab2080529d30452a7628d45d
Author:        Ian Rogers <irogers@google.com>
AuthorDate:    Tue, 14 Apr 2020 22:40:49 -07:00
Committer:     Arnaldo Carvalho de Melo <acme@redhat.com>
CommitterDate: Thu, 30 Apr 2020 10:48:28 -03:00

tools api: Add a lightweight buffered reading api

The synthesize benchmark shows the majority of execution time going to
fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
reading library that will be used to replace these calls in a follow-up
CL. Add tests for the library to perf test.

Committer tests:

  $ perf test api
  63: Test api io                                           : Ok
  $

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrey Zhizhikin <andrey.z@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20200415054050.31645-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/io.h              | 112 +++++++++++-
 tools/perf/tests/Build          |   1 +-
 tools/perf/tests/api-io.c       | 304 +++++++++++++++++++++++++++++++-
 tools/perf/tests/builtin-test.c |   4 +-
 tools/perf/tests/tests.h        |   1 +-
 5 files changed, 422 insertions(+)
 create mode 100644 tools/lib/api/io.h
 create mode 100644 tools/perf/tests/api-io.c

diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 0000000..b7e55b5
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+struct io {
+	/* File descriptor being read/ */
+	int fd;
+	/* Size of the read buffer. */
+	unsigned int buf_len;
+	/* Pointer to storage for buffering read. */
+	char *buf;
+	/* End of the storage. */
+	char *end;
+	/* Currently accessed data pointer. */
+	char *data;
+	/* Set true on when the end of file on read error. */
+	bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+			    char *buf, unsigned int buf_len)
+{
+	io->fd = fd;
+	io->buf_len = buf_len;
+	io->buf = buf;
+	io->end = buf;
+	io->data = buf;
+	io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+	char *ptr = io->data;
+
+	if (io->eof)
+		return -1;
+
+	if (ptr == io->end) {
+		ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+		if (n <= 0) {
+			io->eof = true;
+			return -1;
+		}
+		ptr = &io->buf[0];
+		io->end = &io->buf[n];
+	}
+	io->data = ptr + 1;
+	return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+	bool first_read = true;
+
+	*hex = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*hex = (*hex << 4) | (ch - '0');
+		else if (ch >= 'a' && ch <= 'f')
+			*hex = (*hex << 4) | (ch - 'a' + 10);
+		else if (ch >= 'A' && ch <= 'F')
+			*hex = (*hex << 4) | (ch - 'A' + 10);
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+	bool first_read = true;
+
+	*dec = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*dec = (*dec * 10) + ch - '0';
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index b3d1bf1..c75557a 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -56,6 +56,7 @@ perf-y += mem2node.o
 perf-y += maps.o
 perf-y += time-utils-test.o
 perf-y += genelf.o
+perf-y += api-io.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
new file mode 100644
index 0000000..2ada86a
--- /dev/null
+++ b/tools/perf/tests/api-io.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "tests.h"
+#include <api/io.h>
+#include <linux/kernel.h>
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+#define EXPECT_EQUAL(val, expected)                             \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %d != %d\n",			\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+#define EXPECT_EQUAL64(val, expected)                           \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %lld != %lld\n",		\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+static int make_test_file(char path[PATH_MAX], const char *contents)
+{
+	ssize_t contents_len = strlen(contents);
+	int fd;
+
+	strcpy(path, TEMPL);
+	fd = mkstemp(path);
+	if (fd < 0) {
+		pr_debug("mkstemp failed");
+		return -1;
+	}
+	if (write(fd, contents, contents_len) < contents_len) {
+		pr_debug("short write");
+		close(fd);
+		unlink(path);
+		return -1;
+	}
+	close(fd);
+	return 0;
+}
+
+static int setup_test(char path[PATH_MAX], const char *contents,
+		      size_t buf_size, struct io *io)
+{
+	if (make_test_file(path, contents))
+		return -1;
+
+	io->fd = open(path, O_RDONLY);
+	if (io->fd < 0) {
+		pr_debug("Failed to open '%s'\n", path);
+		unlink(path);
+		return -1;
+	}
+	io->buf = malloc(buf_size);
+	if (io->buf == NULL) {
+		pr_debug("Failed to allocate memory");
+		close(io->fd);
+		unlink(path);
+		return -1;
+	}
+	io__init(io, io->fd, io->buf, buf_size);
+	return 0;
+}
+
+static void cleanup_test(char path[PATH_MAX], struct io *io)
+{
+	free(io->buf);
+	close(io->fd);
+	unlink(path);
+}
+
+static int do_test_get_char(const char *test_string, size_t buf_size)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	size_t i;
+
+	if (setup_test(path, test_string, buf_size, &io))
+		return -1;
+
+	for (i = 0; i < strlen(test_string); i++) {
+		ch = io__get_char(&io);
+
+		EXPECT_EQUAL(ch, test_string[i]);
+		EXPECT_EQUAL(io.eof, false);
+	}
+	ch = io__get_char(&io);
+	EXPECT_EQUAL(ch, -1);
+	EXPECT_EQUAL(io.eof, true);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_char(void)
+{
+	int i, ret = 0;
+	size_t j;
+
+	static const char *const test_strings[] = {
+		"12345678abcdef90",
+		"a\nb\nc\nd\n",
+		"\a\b\t\v\f\r",
+	};
+	for (i = 0; i <= 10; i++) {
+		for (j = 0; j < ARRAY_SIZE(test_strings); j++) {
+			if (do_test_get_char(test_strings[j], 1 << i))
+				ret = -1;
+		}
+	}
+	return ret;
+}
+
+static int do_test_get_hex(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 hex;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_hex(void)
+{
+	int ret = 0;
+
+	if (do_test_get_hex("12345678abcdef90",
+				0x12345678abcdef90, -1,
+				0, -1,
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("12345678ABCDEF90;a;b",
+				0x12345678abcdef90, ';',
+				0xa, ';',
+				0xb, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("10000000000000000000000000000abcdefgh99i",
+				0xabcdef, 'g',
+				0, -2,
+				0x99, 'i',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+static int do_test_get_dec(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 dec;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_dec(void)
+{
+	int ret = 0;
+
+	if (do_test_get_dec("12345678abcdef90",
+				12345678, 'a',
+				0, -2,
+				0, -2,
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("12345678;1;2",
+				12345678, ';',
+				1, ';',
+				2, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c",
+				123456789, 'a',
+				0, -2,
+				99, 'c',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+int test__api_io(struct test *test __maybe_unused,
+		int subtest __maybe_unused)
+{
+	int ret = 0;
+
+	if (test_get_char())
+		ret = TEST_FAIL;
+	if (test_get_hex())
+		ret = TEST_FAIL;
+	if (test_get_dec())
+		ret = TEST_FAIL;
+	return ret;
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b6322eb..3471ec5 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -310,6 +310,10 @@ static struct test generic_tests[] = {
 		.func = test__jit_write_elf,
 	},
 	{
+		.desc = "Test api io",
+		.func = test__api_io,
+	},
+	{
 		.desc = "maps__merge_in",
 		.func = test__maps__merge_in,
 	},
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 61a1ab0..d6d4ac3 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -112,6 +112,7 @@ int test__mem2node(struct test *t, int subtest);
 int test__maps__merge_in(struct test *t, int subtest);
 int test__time_utils(struct test *t, int subtest);
 int test__jit_write_elf(struct test *test, int subtest);
+int test__api_io(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip: perf/core] perf synthetic events: Remove use of sscanf from /proc reading
  2020-04-15  5:40 ` [PATCH v5 3/3] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
@ 2020-05-08 13:05   ` tip-bot2 for Ian Rogers
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot2 for Ian Rogers @ 2020-05-08 13:05 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ian Rogers, Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim,
	Alexander Shishkin, Andrey Zhizhikin, Kan Liang, Kefeng Wang,
	Mark Rutland, Peter Zijlstra, Petr Mladek, Stephane Eranian,
	Thomas Gleixner, x86, LKML

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     2069425eb3f8257e6e73548030fe65d5f0faca0d
Gitweb:        https://git.kernel.org/tip/2069425eb3f8257e6e73548030fe65d5f0faca0d
Author:        Ian Rogers <irogers@google.com>
AuthorDate:    Tue, 14 Apr 2020 22:40:50 -07:00
Committer:     Arnaldo Carvalho de Melo <acme@redhat.com>
CommitterDate: Thu, 30 Apr 2020 10:48:29 -03:00

perf synthetic events: Remove use of sscanf from /proc reading

The synthesize benchmark, run on a single process and thread, shows
perf_event__synthesize_mmap_events as the hottest function with fgets
and sscanf taking the majority of execution time.

fscanf performs similarly well. Replace the scanf call with manual
reading of each field of the /proc/pid/maps line, and remove some
unnecessary buffering.

This change also addresses potential, but unlikely, buffer overruns for
the string values read by scanf.

Performance before is:

  $ sudo perf bench internals synthesize -m 16 -M 16 -s -t
  \# Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 102.810 usec (+- 0.027 usec)
    Average num. events: 17.000 (+- 0.000)
    Average time per event 6.048 usec
    Average data synthesis took: 106.325 usec (+- 0.018 usec)
    Average num. events: 89.000 (+- 0.000)
    Average time per event 1.195 usec
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 16
      Average synthesis took: 68103.100 usec (+- 441.234 usec)
      Average num. events: 30703.000 (+- 0.730)
      Average time per event 2.218 usec

And after is:

  $ sudo perf bench internals synthesize -m 16 -M 16 -s -t
  \# Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 50.388 usec (+- 0.031 usec)
    Average num. events: 17.000 (+- 0.000)
    Average time per event 2.964 usec
    Average data synthesis took: 52.693 usec (+- 0.020 usec)
    Average num. events: 89.000 (+- 0.000)
    Average time per event 0.592 usec
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 16
      Average synthesis took: 45022.400 usec (+- 552.740 usec)
      Average num. events: 30624.200 (+- 10.037)
      Average time per event 1.470 usec

On a Intel Xeon 6154 compiling with Debian gcc 9.2.1.

Committer testing:

On a AMD Ryzen 5 3600X 6-Core Processor:

Before:

  # perf bench internals synthesize --min-threads 12 --max-threads 12 --st --mt
  # Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 267.491 usec (+- 0.176 usec)
    Average num. events: 56.000 (+- 0.000)
    Average time per event 4.777 usec
    Average data synthesis took: 277.257 usec (+- 0.169 usec)
    Average num. events: 287.000 (+- 0.000)
    Average time per event 0.966 usec
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 12
      Average synthesis took: 81599.500 usec (+- 346.315 usec)
      Average num. events: 36096.100 (+- 2.523)
      Average time per event 2.261 usec
  #

After:

  # perf bench internals synthesize --min-threads 12 --max-threads 12 --st --mt
  # Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 110.125 usec (+- 0.080 usec)
    Average num. events: 56.000 (+- 0.000)
    Average time per event 1.967 usec
    Average data synthesis took: 118.518 usec (+- 0.057 usec)
    Average num. events: 287.000 (+- 0.000)
    Average time per event 0.413 usec
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 12
      Average synthesis took: 43490.700 usec (+- 284.527 usec)
      Average num. events: 37028.500 (+- 0.563)
      Average time per event 1.175 usec
  #

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrey Zhizhikin <andrey.z@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20200415054050.31645-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/synthetic-events.c | 157 ++++++++++++++++++----------
 1 file changed, 105 insertions(+), 52 deletions(-)

diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 9d4aa95..1ea9ada 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
+#include <api/io.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -273,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+				u32 *prot, u32 *flags, __u64 *offset,
+				u32 *maj, u32 *min,
+				__u64 *inode,
+				ssize_t pathname_size, char *pathname)
+{
+	__u64 temp;
+	int ch;
+	char *start_pathname = pathname;
+
+	if (io__get_hex(io, start) != '-')
+		return false;
+	if (io__get_hex(io, end) != ' ')
+		return false;
+
+	/* map protection and flags bits */
+	*prot = 0;
+	ch = io__get_char(io);
+	if (ch == 'r')
+		*prot |= PROT_READ;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'w')
+		*prot |= PROT_WRITE;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'x')
+		*prot |= PROT_EXEC;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 's')
+		*flags = MAP_SHARED;
+	else if (ch == 'p')
+		*flags = MAP_PRIVATE;
+	else
+		return false;
+	if (io__get_char(io) != ' ')
+		return false;
+
+	if (io__get_hex(io, offset) != ' ')
+		return false;
+
+	if (io__get_hex(io, &temp) != ':')
+		return false;
+	*maj = temp;
+	if (io__get_hex(io, &temp) != ' ')
+		return false;
+	*min = temp;
+
+	ch = io__get_dec(io, inode);
+	if (ch != ' ') {
+		*pathname = '\0';
+		return ch == '\n';
+	}
+	do {
+		ch = io__get_char(io);
+	} while (ch == ' ');
+	while (true) {
+		if (ch < 0)
+			return false;
+		if (ch == '\0' || ch == '\n' ||
+		    (pathname + 1 - start_pathname) >= pathname_size) {
+			*pathname = '\0';
+			return true;
+		}
+		*pathname++ = ch;
+		ch = io__get_char(io);
+	}
+}
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -280,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       struct machine *machine,
 				       bool mmap_data)
 {
-	FILE *fp;
 	unsigned long long t;
 	char bf[BUFSIZ];
+	struct io io;
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
@@ -295,28 +369,39 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
 		machine->root_dir, pid, pid);
 
-	fp = fopen(bf, "r");
-	if (fp == NULL) {
+	io.fd = open(bf, O_RDONLY, 0);
+	if (io.fd < 0) {
 		/*
 		 * We raced with a task exiting - just return:
 		 */
 		pr_debug("couldn't open %s\n", bf);
 		return -1;
 	}
+	io__init(&io, io.fd, bf, sizeof(bf));
 
 	event->header.type = PERF_RECORD_MMAP2;
 	t = rdclock();
 
-	while (1) {
-		char prot[5];
-		char execname[PATH_MAX];
-		char anonstr[] = "//anon";
-		unsigned int ino;
+	while (!io.eof) {
+		static const char anonstr[] = "//anon";
 		size_t size;
-		ssize_t n;
 
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			break;
+		/* ensure null termination since stack will be reused. */
+		event->mmap2.filename[0] = '\0';
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		if (!read_proc_maps_line(&io,
+					&event->mmap2.start,
+					&event->mmap2.len,
+					&event->mmap2.prot,
+					&event->mmap2.flags,
+					&event->mmap2.pgoff,
+					&event->mmap2.maj,
+					&event->mmap2.min,
+					&event->mmap2.ino,
+					sizeof(event->mmap2.filename),
+					event->mmap2.filename))
+			continue;
 
 		if ((rdclock() - t) > timeout) {
 			pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
@@ -327,23 +412,6 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			goto out;
 		}
 
-		/* ensure null termination since stack will be reused. */
-		strcpy(execname, "");
-
-		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
-		       &event->mmap2.start, &event->mmap2.len, prot,
-		       &event->mmap2.pgoff, &event->mmap2.maj,
-		       &event->mmap2.min,
-		       &ino, execname);
-
-		/*
- 		 * Anon maps don't have the execname.
- 		 */
-		if (n < 7)
-			continue;
-
-		event->mmap2.ino = (u64)ino;
 		event->mmap2.ino_generation = 0;
 
 		/*
@@ -354,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		else
 			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
 
-		/* map protection and flags bits */
-		event->mmap2.prot = 0;
-		event->mmap2.flags = 0;
-		if (prot[0] == 'r')
-			event->mmap2.prot |= PROT_READ;
-		if (prot[1] == 'w')
-			event->mmap2.prot |= PROT_WRITE;
-		if (prot[2] == 'x')
-			event->mmap2.prot |= PROT_EXEC;
-
-		if (prot[3] == 's')
-			event->mmap2.flags |= MAP_SHARED;
-		else
-			event->mmap2.flags |= MAP_PRIVATE;
-
-		if (prot[2] != 'x') {
-			if (!mmap_data || prot[0] != 'r')
+		if ((event->mmap2.prot & PROT_EXEC) == 0) {
+			if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
 				continue;
 
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -380,17 +433,17 @@ out:
 		if (truncation)
 			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
 
-		if (!strcmp(execname, ""))
-			strcpy(execname, anonstr);
+		if (!strcmp(event->mmap2.filename, ""))
+			strcpy(event->mmap2.filename, anonstr);
 
 		if (hugetlbfs_mnt_len &&
-		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
-			strcpy(execname, anonstr);
+		    !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+			     hugetlbfs_mnt_len)) {
+			strcpy(event->mmap2.filename, anonstr);
 			event->mmap2.flags |= MAP_HUGETLB;
 		}
 
-		size = strlen(execname) + 1;
-		memcpy(event->mmap2.filename, execname, size);
+		size = strlen(event->mmap2.filename) + 1;
 		size = PERF_ALIGN(size, sizeof(u64));
 		event->mmap2.len -= event->mmap.start;
 		event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -409,7 +462,7 @@ out:
 			break;
 	}
 
-	fclose(fp);
+	close(io.fd);
 	return rc;
 }
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip: perf/core] perf bench: Add a multi-threaded synthesize benchmark
  2020-04-15  5:40 ` [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark Ian Rogers
  2020-04-23 14:17   ` Arnaldo Carvalho de Melo
@ 2020-05-08 13:05   ` tip-bot2 for Ian Rogers
  1 sibling, 0 replies; 11+ messages in thread
From: tip-bot2 for Ian Rogers @ 2020-05-08 13:05 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ian Rogers, Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim,
	Alexander Shishkin, Andrey Zhizhikin, Kan Liang, Kefeng Wang,
	Mark Rutland, Peter Zijlstra, Petr Mladek, Stephane Eranian,
	Thomas Gleixner, x86, LKML

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     13edc237200c75425ab0e3fe4b4c75dafb468c2e
Gitweb:        https://git.kernel.org/tip/13edc237200c75425ab0e3fe4b4c75dafb468c2e
Author:        Ian Rogers <irogers@google.com>
AuthorDate:    Tue, 14 Apr 2020 22:40:48 -07:00
Committer:     Arnaldo Carvalho de Melo <acme@redhat.com>
CommitterDate: Thu, 30 Apr 2020 10:48:25 -03:00

perf bench: Add a multi-threaded synthesize benchmark

By default this isn't run as it reads /proc and may not have access.
For consistency, modify the single threaded benchmark to compute an
average time per event.

Committer testing:

  $ grep -m1 "model name" /proc/cpuinfo
  model name	: Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz
  $ grep "model name" /proc/cpuinfo  | wc -l
  8
  $
  $ perf bench internals synthesize -h
  # Running 'internals/synthesize' benchmark:

   Usage: perf bench internals synthesize <options>

      -I, --multi-iterations <n>
                            Number of iterations used to compute multi-threaded average
      -i, --single-iterations <n>
                            Number of iterations used to compute single-threaded average
      -M, --max-threads <n>
                            Maximum number of threads in multithreaded bench
      -m, --min-threads <n>
                            Minimum number of threads in multithreaded bench
      -s, --st              Run single threaded benchmark
      -t, --mt              Run multi-threaded benchmark

  $
  $ perf bench internals synthesize -t
  # Running 'internals/synthesize' benchmark:
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 1
      Average synthesis took: 65449.000 usec (+- 586.442 usec)
      Average num. events: 9405.400 (+- 0.306)
      Average time per event 6.959 usec
    Number of synthesis threads: 2
      Average synthesis took: 37838.300 usec (+- 130.259 usec)
      Average num. events: 9501.800 (+- 20.469)
      Average time per event 3.982 usec
    Number of synthesis threads: 3
      Average synthesis took: 48551.400 usec (+- 225.686 usec)
      Average num. events: 9544.000 (+- 0.000)
      Average time per event 5.087 usec
    Number of synthesis threads: 4
      Average synthesis took: 29632.500 usec (+- 50.808 usec)
      Average num. events: 9544.000 (+- 0.000)
      Average time per event 3.105 usec
    Number of synthesis threads: 5
      Average synthesis took: 33920.400 usec (+- 284.509 usec)
      Average num. events: 9544.000 (+- 0.000)
      Average time per event 3.554 usec
    Number of synthesis threads: 6
      Average synthesis took: 27604.100 usec (+- 72.344 usec)
      Average num. events: 9548.000 (+- 0.000)
      Average time per event 2.891 usec
    Number of synthesis threads: 7
      Average synthesis took: 25406.300 usec (+- 933.371 usec)
      Average num. events: 9545.500 (+- 0.167)
      Average time per event 2.662 usec
    Number of synthesis threads: 8
      Average synthesis took: 24110.400 usec (+- 73.229 usec)
      Average num. events: 9551.000 (+- 0.000)
      Average time per event 2.524 usec
  $

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrey Zhizhikin <andrey.z@gmail.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20200415054050.31645-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/synthesize.c | 211 +++++++++++++++++++++++++++++----
 1 file changed, 186 insertions(+), 25 deletions(-)

diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 6291257..8d624ae 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -10,60 +10,105 @@
 #include "bench.h"
 #include "../util/debug.h"
 #include "../util/session.h"
+#include "../util/stat.h"
 #include "../util/synthetic-events.h"
 #include "../util/target.h"
 #include "../util/thread_map.h"
 #include "../util/tool.h"
+#include "../util/util.h"
+#include <linux/atomic.h>
 #include <linux/err.h>
 #include <linux/time64.h>
 #include <subcmd/parse-options.h>
 
-static unsigned int iterations = 10000;
+static unsigned int min_threads = 1;
+static unsigned int max_threads = UINT_MAX;
+static unsigned int single_iterations = 10000;
+static unsigned int multi_iterations = 10;
+static bool run_st;
+static bool run_mt;
 
 static const struct option options[] = {
-	OPT_UINTEGER('i', "iterations", &iterations,
-		"Number of iterations used to compute average"),
+	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
+	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
+	OPT_UINTEGER('m', "min-threads", &min_threads,
+		"Minimum number of threads in multithreaded bench"),
+	OPT_UINTEGER('M', "max-threads", &max_threads,
+		"Maximum number of threads in multithreaded bench"),
+	OPT_UINTEGER('i', "single-iterations", &single_iterations,
+		"Number of iterations used to compute single-threaded average"),
+	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
+		"Number of iterations used to compute multi-threaded average"),
 	OPT_END()
 };
 
-static const char *const usage[] = {
+static const char *const bench_usage[] = {
 	"perf bench internals synthesize <options>",
 	NULL
 };
 
+static atomic_t event_count;
 
-static int do_synthesize(struct perf_session *session,
-			struct perf_thread_map *threads,
-			struct target *target, bool data_mmap)
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	atomic_inc(&event_count);
+	return 0;
+}
+
+static int do_run_single_threaded(struct perf_session *session,
+				struct perf_thread_map *threads,
+				struct target *target, bool data_mmap)
 {
 	const unsigned int nr_threads_synthesize = 1;
 	struct timeval start, end, diff;
 	u64 runtime_us;
 	unsigned int i;
-	double average;
+	double time_average, time_stddev, event_average, event_stddev;
 	int err;
+	struct stats time_stats, event_stats;
 
-	gettimeofday(&start, NULL);
-	for (i = 0; i < iterations; i++) {
-		err = machine__synthesize_threads(&session->machines.host,
-						target, threads, data_mmap,
+	init_stats(&time_stats);
+	init_stats(&event_stats);
+
+	for (i = 0; i < single_iterations; i++) {
+		atomic_set(&event_count, 0);
+		gettimeofday(&start, NULL);
+		err = __machine__synthesize_threads(&session->machines.host,
+						NULL,
+						target, threads,
+						process_synthesized_event,
+						data_mmap,
 						nr_threads_synthesize);
 		if (err)
 			return err;
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+		update_stats(&event_stats, atomic_read(&event_count));
 	}
 
-	gettimeofday(&end, NULL);
-	timersub(&end, &start, &diff);
-	runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
-	average = (double)runtime_us/(double)iterations;
-	printf("Average %ssynthesis took: %f usec\n",
-		data_mmap ? "data " : "", average);
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
+		data_mmap ? "data " : "", time_average, time_stddev);
+
+	event_average = avg_stats(&event_stats);
+	event_stddev = stddev_stats(&event_stats);
+	printf("  Average num. events: %.3f (+- %.3f)\n",
+		event_average, event_stddev);
+
+	printf("  Average time per event %.3f usec\n",
+		time_average / event_average);
 	return 0;
 }
 
-int bench_synthesize(int argc, const char **argv)
+static int run_single_threaded(void)
 {
-	struct perf_tool tool;
 	struct perf_session *session;
 	struct target target = {
 		.pid = "self",
@@ -71,8 +116,7 @@ int bench_synthesize(int argc, const char **argv)
 	struct perf_thread_map *threads;
 	int err;
 
-	argc = parse_options(argc, argv, options, usage, 0);
-
+	perf_set_singlethreaded();
 	session = perf_session__new(NULL, false, NULL);
 	if (IS_ERR(session)) {
 		pr_err("Session creation failed.\n");
@@ -84,13 +128,16 @@ int bench_synthesize(int argc, const char **argv)
 		err = -ENOMEM;
 		goto err_out;
 	}
-	perf_tool__fill_defaults(&tool);
 
-	err = do_synthesize(session, threads, &target, false);
+	puts(
+"Computing performance of single threaded perf event synthesis by\n"
+"synthesizing events on the perf process itself:");
+
+	err = do_run_single_threaded(session, threads, &target, false);
 	if (err)
 		goto err_out;
 
-	err = do_synthesize(session, threads, &target, true);
+	err = do_run_single_threaded(session, threads, &target, true);
 
 err_out:
 	if (threads)
@@ -99,3 +146,117 @@ err_out:
 	perf_session__delete(session);
 	return err;
 }
+
+static int do_run_multi_threaded(struct target *target,
+				unsigned int nr_threads_synthesize)
+{
+	struct timeval start, end, diff;
+	u64 runtime_us;
+	unsigned int i;
+	double time_average, time_stddev, event_average, event_stddev;
+	int err;
+	struct stats time_stats, event_stats;
+	struct perf_session *session;
+
+	init_stats(&time_stats);
+	init_stats(&event_stats);
+	for (i = 0; i < multi_iterations; i++) {
+		session = perf_session__new(NULL, false, NULL);
+		if (!session)
+			return -ENOMEM;
+
+		atomic_set(&event_count, 0);
+		gettimeofday(&start, NULL);
+		err = __machine__synthesize_threads(&session->machines.host,
+						NULL,
+						target, NULL,
+						process_synthesized_event,
+						false,
+						nr_threads_synthesize);
+		if (err) {
+			perf_session__delete(session);
+			return err;
+		}
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &diff);
+		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+		update_stats(&time_stats, runtime_us);
+		update_stats(&event_stats, atomic_read(&event_count));
+		perf_session__delete(session);
+	}
+
+	time_average = avg_stats(&time_stats);
+	time_stddev = stddev_stats(&time_stats);
+	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
+		time_average, time_stddev);
+
+	event_average = avg_stats(&event_stats);
+	event_stddev = stddev_stats(&event_stats);
+	printf("    Average num. events: %.3f (+- %.3f)\n",
+		event_average, event_stddev);
+
+	printf("    Average time per event %.3f usec\n",
+		time_average / event_average);
+	return 0;
+}
+
+static int run_multi_threaded(void)
+{
+	struct target target = {
+		.cpu_list = "0"
+	};
+	unsigned int nr_threads_synthesize;
+	int err;
+
+	if (max_threads == UINT_MAX)
+		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+	puts(
+"Computing performance of multi threaded perf event synthesis by\n"
+"synthesizing events on CPU 0:");
+
+	for (nr_threads_synthesize = min_threads;
+	     nr_threads_synthesize <= max_threads;
+	     nr_threads_synthesize++) {
+		if (nr_threads_synthesize == 1)
+			perf_set_singlethreaded();
+		else
+			perf_set_multithreaded();
+
+		printf("  Number of synthesis threads: %u\n",
+			nr_threads_synthesize);
+
+		err = do_run_multi_threaded(&target, nr_threads_synthesize);
+		if (err)
+			return err;
+	}
+	perf_set_singlethreaded();
+	return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+	int err = 0;
+
+	argc = parse_options(argc, argv, options, bench_usage, 0);
+	if (argc) {
+		usage_with_options(bench_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * If neither single threaded or multi-threaded are specified, default
+	 * to running just single threaded.
+	 */
+	if (!run_st && !run_mt)
+		run_st = true;
+
+	if (run_st)
+		err = run_single_threaded();
+
+	if (!err && run_mt)
+		err = run_multi_threaded();
+
+	return err;
+}

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-05-08 13:08 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-15  5:40 [PATCH v5 0/3] perf synthetic events Ian Rogers
2020-04-15  5:40 ` [PATCH v5 1/3] perf bench: add a multi-threaded synthesize benchmark Ian Rogers
2020-04-23 14:17   ` Arnaldo Carvalho de Melo
2020-05-08 13:05   ` [tip: perf/core] perf bench: Add " tip-bot2 for Ian Rogers
2020-04-15  5:40 ` [PATCH v5 2/3] tools api: add a lightweight buffered reading api Ian Rogers
2020-05-08 13:05   ` [tip: perf/core] tools api: Add " tip-bot2 for Ian Rogers
2020-04-15  5:40 ` [PATCH v5 3/3] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
2020-05-08 13:05   ` [tip: perf/core] " tip-bot2 for Ian Rogers
2020-04-16 14:19 ` [PATCH v5 0/3] perf synthetic events Namhyung Kim
2020-04-23 14:24   ` Arnaldo Carvalho de Melo
2020-04-24  7:42     ` Jiri Olsa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.