linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 1/2] tools api: add a lightweight buffered reading api
@ 2020-04-11  6:42 Ian Rogers
  2020-04-11  6:42 ` [PATCH v4 2/2] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
  2020-04-13  7:29 ` [PATCH v4 1/2] tools api: add a lightweight buffered reading api Namhyung Kim
  0 siblings, 2 replies; 8+ messages in thread
From: Ian Rogers @ 2020-04-11  6:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Petr Mladek, Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner,
	Kan Liang, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

The synthesize benchmark shows the majority of execution time going to
fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
reading library that will be used to replace these calls in a follow-up
CL. Add tests for the library to perf test.

v4 adds the test file missed in v3.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/api/io.h              | 112 ++++++++++++
 tools/perf/tests/Build          |   1 +
 tools/perf/tests/api-io.c       | 304 ++++++++++++++++++++++++++++++++
 tools/perf/tests/builtin-test.c |   4 +
 tools/perf/tests/tests.h        |   1 +
 5 files changed, 422 insertions(+)
 create mode 100644 tools/lib/api/io.h
 create mode 100644 tools/perf/tests/api-io.c

diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 000000000000..b7e55b5f8a4a
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+struct io {
+	/* File descriptor being read/ */
+	int fd;
+	/* Size of the read buffer. */
+	unsigned int buf_len;
+	/* Pointer to storage for buffering read. */
+	char *buf;
+	/* End of the storage. */
+	char *end;
+	/* Currently accessed data pointer. */
+	char *data;
+	/* Set true on when the end of file on read error. */
+	bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+			    char *buf, unsigned int buf_len)
+{
+	io->fd = fd;
+	io->buf_len = buf_len;
+	io->buf = buf;
+	io->end = buf;
+	io->data = buf;
+	io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+	char *ptr = io->data;
+
+	if (io->eof)
+		return -1;
+
+	if (ptr == io->end) {
+		ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+		if (n <= 0) {
+			io->eof = true;
+			return -1;
+		}
+		ptr = &io->buf[0];
+		io->end = &io->buf[n];
+	}
+	io->data = ptr + 1;
+	return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+	bool first_read = true;
+
+	*hex = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*hex = (*hex << 4) | (ch - '0');
+		else if (ch >= 'a' && ch <= 'f')
+			*hex = (*hex << 4) | (ch - 'a' + 10);
+		else if (ch >= 'A' && ch <= 'F')
+			*hex = (*hex << 4) | (ch - 'A' + 10);
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+	bool first_read = true;
+
+	*dec = 0;
+	while (true) {
+		int ch = io__get_char(io);
+
+		if (ch < 0)
+			return ch;
+		if (ch >= '0' && ch <= '9')
+			*dec = (*dec * 10) + ch - '0';
+		else if (first_read)
+			return -2;
+		else
+			return ch;
+		first_read = false;
+	}
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index b3d1bf13ca07..c75557aeef0e 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -56,6 +56,7 @@ perf-y += mem2node.o
 perf-y += maps.o
 perf-y += time-utils-test.o
 perf-y += genelf.o
+perf-y += api-io.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
new file mode 100644
index 000000000000..2ada86ad6084
--- /dev/null
+++ b/tools/perf/tests/api-io.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "tests.h"
+#include <api/io.h>
+#include <linux/kernel.h>
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+#define EXPECT_EQUAL(val, expected)                             \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %d != %d\n",			\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+#define EXPECT_EQUAL64(val, expected)                           \
+do {								\
+	if (val != expected) {					\
+		pr_debug("%s:%d: %lld != %lld\n",		\
+			__FILE__, __LINE__, val, expected);	\
+		ret = -1;					\
+	}							\
+} while (0)
+
+static int make_test_file(char path[PATH_MAX], const char *contents)
+{
+	ssize_t contents_len = strlen(contents);
+	int fd;
+
+	strcpy(path, TEMPL);
+	fd = mkstemp(path);
+	if (fd < 0) {
+		pr_debug("mkstemp failed");
+		return -1;
+	}
+	if (write(fd, contents, contents_len) < contents_len) {
+		pr_debug("short write");
+		close(fd);
+		unlink(path);
+		return -1;
+	}
+	close(fd);
+	return 0;
+}
+
+static int setup_test(char path[PATH_MAX], const char *contents,
+		      size_t buf_size, struct io *io)
+{
+	if (make_test_file(path, contents))
+		return -1;
+
+	io->fd = open(path, O_RDONLY);
+	if (io->fd < 0) {
+		pr_debug("Failed to open '%s'\n", path);
+		unlink(path);
+		return -1;
+	}
+	io->buf = malloc(buf_size);
+	if (io->buf == NULL) {
+		pr_debug("Failed to allocate memory");
+		close(io->fd);
+		unlink(path);
+		return -1;
+	}
+	io__init(io, io->fd, io->buf, buf_size);
+	return 0;
+}
+
+static void cleanup_test(char path[PATH_MAX], struct io *io)
+{
+	free(io->buf);
+	close(io->fd);
+	unlink(path);
+}
+
+static int do_test_get_char(const char *test_string, size_t buf_size)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	size_t i;
+
+	if (setup_test(path, test_string, buf_size, &io))
+		return -1;
+
+	for (i = 0; i < strlen(test_string); i++) {
+		ch = io__get_char(&io);
+
+		EXPECT_EQUAL(ch, test_string[i]);
+		EXPECT_EQUAL(io.eof, false);
+	}
+	ch = io__get_char(&io);
+	EXPECT_EQUAL(ch, -1);
+	EXPECT_EQUAL(io.eof, true);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_char(void)
+{
+	int i, ret = 0;
+	size_t j;
+
+	static const char *const test_strings[] = {
+		"12345678abcdef90",
+		"a\nb\nc\nd\n",
+		"\a\b\t\v\f\r",
+	};
+	for (i = 0; i <= 10; i++) {
+		for (j = 0; j < ARRAY_SIZE(test_strings); j++) {
+			if (do_test_get_char(test_strings[j], 1 << i))
+				ret = -1;
+		}
+	}
+	return ret;
+}
+
+static int do_test_get_hex(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 hex;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_hex(&io, &hex);
+	EXPECT_EQUAL64(hex, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_hex(void)
+{
+	int ret = 0;
+
+	if (do_test_get_hex("12345678abcdef90",
+				0x12345678abcdef90, -1,
+				0, -1,
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("12345678ABCDEF90;a;b",
+				0x12345678abcdef90, ';',
+				0xa, ';',
+				0xb, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_hex("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_hex("10000000000000000000000000000abcdefgh99i",
+				0xabcdef, 'g',
+				0, -2,
+				0x99, 'i',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+static int do_test_get_dec(const char *test_string,
+			__u64 val1, int ch1,
+			__u64 val2, int ch2,
+			__u64 val3, int ch3,
+			bool end_eof)
+{
+	char path[PATH_MAX];
+	struct io io;
+	int ch, ret = 0;
+	__u64 dec;
+
+	if (setup_test(path, test_string, 4, &io))
+		return -1;
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val1);
+	EXPECT_EQUAL(ch, ch1);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val2);
+	EXPECT_EQUAL(ch, ch2);
+
+	ch = io__get_dec(&io, &dec);
+	EXPECT_EQUAL64(dec, val3);
+	EXPECT_EQUAL(ch, ch3);
+
+	EXPECT_EQUAL(io.eof, end_eof);
+
+	cleanup_test(path, &io);
+	return ret;
+}
+
+static int test_get_dec(void)
+{
+	int ret = 0;
+
+	if (do_test_get_dec("12345678abcdef90",
+				12345678, 'a',
+				0, -2,
+				0, -2,
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("1\n2\n3\n",
+				1, '\n',
+				2, '\n',
+				3, '\n',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("12345678;1;2",
+				12345678, ';',
+				1, ';',
+				2, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("0x1x2x",
+				0, 'x',
+				1, 'x',
+				2, 'x',
+				false))
+		ret = -1;
+
+	if (do_test_get_dec("x1x",
+				0, -2,
+				1, 'x',
+				0, -1,
+				true))
+		ret = -1;
+
+	if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c",
+				123456789, 'a',
+				0, -2,
+				99, 'c',
+				false))
+		ret = -1;
+
+	return ret;
+}
+
+int test__api_io(struct test *test __maybe_unused,
+		int subtest __maybe_unused)
+{
+	int ret = 0;
+
+	if (test_get_char())
+		ret = TEST_FAIL;
+	if (test_get_hex())
+		ret = TEST_FAIL;
+	if (test_get_dec())
+		ret = TEST_FAIL;
+	return ret;
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b6322eb0f423..3471ec52ea11 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -309,6 +309,10 @@ static struct test generic_tests[] = {
 		.desc = "Test jit_write_elf",
 		.func = test__jit_write_elf,
 	},
+	{
+		.desc = "Test api io",
+		.func = test__api_io,
+	},
 	{
 		.desc = "maps__merge_in",
 		.func = test__maps__merge_in,
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 61a1ab032080..d6d4ac34eeb7 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -112,6 +112,7 @@ int test__mem2node(struct test *t, int subtest);
 int test__maps__merge_in(struct test *t, int subtest);
 int test__time_utils(struct test *t, int subtest);
 int test__jit_write_elf(struct test *test, int subtest);
+int test__api_io(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4 2/2] perf synthetic events: Remove use of sscanf from /proc reading
  2020-04-11  6:42 [PATCH v4 1/2] tools api: add a lightweight buffered reading api Ian Rogers
@ 2020-04-11  6:42 ` Ian Rogers
  2020-04-13  7:29 ` [PATCH v4 1/2] tools api: add a lightweight buffered reading api Namhyung Kim
  1 sibling, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2020-04-11  6:42 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Namhyung Kim,
	Petr Mladek, Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner,
	Kan Liang, linux-kernel, linux-perf-users
  Cc: Stephane Eranian, Ian Rogers

The synthesize benchmark, run on a single process and thread, shows
perf_event__synthesize_mmap_events as the hottest function with fgets
and sscanf taking the majority of execution time. fscanf performs
similarly well. Replace the scanf call with manual reading of each field
of the /proc/pid/maps line, and remove some unnecessary buffering.
This change also addresses potential, but unlikely, buffer overruns for
the string values read by scanf.

Performance before is:
Average synthesis took: 120.195100 usec
Average data synthesis took: 156.582300 usec

And after is:
Average synthesis took: 67.189100 usec
Average data synthesis took: 102.451600 usec

On a Intel Xeon 6154 compiling with Debian gcc 9.2.1.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/synthetic-events.c | 157 +++++++++++++++++++----------
 1 file changed, 105 insertions(+), 52 deletions(-)

diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 9d4aa951eaa6..1ea9adaef9c7 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include <api/fs/fs.h>
+#include <api/io.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -273,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+				u32 *prot, u32 *flags, __u64 *offset,
+				u32 *maj, u32 *min,
+				__u64 *inode,
+				ssize_t pathname_size, char *pathname)
+{
+	__u64 temp;
+	int ch;
+	char *start_pathname = pathname;
+
+	if (io__get_hex(io, start) != '-')
+		return false;
+	if (io__get_hex(io, end) != ' ')
+		return false;
+
+	/* map protection and flags bits */
+	*prot = 0;
+	ch = io__get_char(io);
+	if (ch == 'r')
+		*prot |= PROT_READ;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'w')
+		*prot |= PROT_WRITE;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 'x')
+		*prot |= PROT_EXEC;
+	else if (ch != '-')
+		return false;
+	ch = io__get_char(io);
+	if (ch == 's')
+		*flags = MAP_SHARED;
+	else if (ch == 'p')
+		*flags = MAP_PRIVATE;
+	else
+		return false;
+	if (io__get_char(io) != ' ')
+		return false;
+
+	if (io__get_hex(io, offset) != ' ')
+		return false;
+
+	if (io__get_hex(io, &temp) != ':')
+		return false;
+	*maj = temp;
+	if (io__get_hex(io, &temp) != ' ')
+		return false;
+	*min = temp;
+
+	ch = io__get_dec(io, inode);
+	if (ch != ' ') {
+		*pathname = '\0';
+		return ch == '\n';
+	}
+	do {
+		ch = io__get_char(io);
+	} while (ch == ' ');
+	while (true) {
+		if (ch < 0)
+			return false;
+		if (ch == '\0' || ch == '\n' ||
+		    (pathname + 1 - start_pathname) >= pathname_size) {
+			*pathname = '\0';
+			return true;
+		}
+		*pathname++ = ch;
+		ch = io__get_char(io);
+	}
+}
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -280,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       struct machine *machine,
 				       bool mmap_data)
 {
-	FILE *fp;
 	unsigned long long t;
 	char bf[BUFSIZ];
+	struct io io;
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
@@ -295,28 +369,39 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
 		machine->root_dir, pid, pid);
 
-	fp = fopen(bf, "r");
-	if (fp == NULL) {
+	io.fd = open(bf, O_RDONLY, 0);
+	if (io.fd < 0) {
 		/*
 		 * We raced with a task exiting - just return:
 		 */
 		pr_debug("couldn't open %s\n", bf);
 		return -1;
 	}
+	io__init(&io, io.fd, bf, sizeof(bf));
 
 	event->header.type = PERF_RECORD_MMAP2;
 	t = rdclock();
 
-	while (1) {
-		char prot[5];
-		char execname[PATH_MAX];
-		char anonstr[] = "//anon";
-		unsigned int ino;
+	while (!io.eof) {
+		static const char anonstr[] = "//anon";
 		size_t size;
-		ssize_t n;
 
-		if (fgets(bf, sizeof(bf), fp) == NULL)
-			break;
+		/* ensure null termination since stack will be reused. */
+		event->mmap2.filename[0] = '\0';
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		if (!read_proc_maps_line(&io,
+					&event->mmap2.start,
+					&event->mmap2.len,
+					&event->mmap2.prot,
+					&event->mmap2.flags,
+					&event->mmap2.pgoff,
+					&event->mmap2.maj,
+					&event->mmap2.min,
+					&event->mmap2.ino,
+					sizeof(event->mmap2.filename),
+					event->mmap2.filename))
+			continue;
 
 		if ((rdclock() - t) > timeout) {
 			pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
@@ -327,23 +412,6 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			goto out;
 		}
 
-		/* ensure null termination since stack will be reused. */
-		strcpy(execname, "");
-
-		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
-		       &event->mmap2.start, &event->mmap2.len, prot,
-		       &event->mmap2.pgoff, &event->mmap2.maj,
-		       &event->mmap2.min,
-		       &ino, execname);
-
-		/*
- 		 * Anon maps don't have the execname.
- 		 */
-		if (n < 7)
-			continue;
-
-		event->mmap2.ino = (u64)ino;
 		event->mmap2.ino_generation = 0;
 
 		/*
@@ -354,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		else
 			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
 
-		/* map protection and flags bits */
-		event->mmap2.prot = 0;
-		event->mmap2.flags = 0;
-		if (prot[0] == 'r')
-			event->mmap2.prot |= PROT_READ;
-		if (prot[1] == 'w')
-			event->mmap2.prot |= PROT_WRITE;
-		if (prot[2] == 'x')
-			event->mmap2.prot |= PROT_EXEC;
-
-		if (prot[3] == 's')
-			event->mmap2.flags |= MAP_SHARED;
-		else
-			event->mmap2.flags |= MAP_PRIVATE;
-
-		if (prot[2] != 'x') {
-			if (!mmap_data || prot[0] != 'r')
+		if ((event->mmap2.prot & PROT_EXEC) == 0) {
+			if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
 				continue;
 
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -380,17 +433,17 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (truncation)
 			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
 
-		if (!strcmp(execname, ""))
-			strcpy(execname, anonstr);
+		if (!strcmp(event->mmap2.filename, ""))
+			strcpy(event->mmap2.filename, anonstr);
 
 		if (hugetlbfs_mnt_len &&
-		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
-			strcpy(execname, anonstr);
+		    !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+			     hugetlbfs_mnt_len)) {
+			strcpy(event->mmap2.filename, anonstr);
 			event->mmap2.flags |= MAP_HUGETLB;
 		}
 
-		size = strlen(execname) + 1;
-		memcpy(event->mmap2.filename, execname, size);
+		size = strlen(event->mmap2.filename) + 1;
 		size = PERF_ALIGN(size, sizeof(u64));
 		event->mmap2.len -= event->mmap.start;
 		event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -409,7 +462,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			break;
 	}
 
-	fclose(fp);
+	close(io.fd);
 	return rc;
 }
 
-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/2] tools api: add a lightweight buffered reading api
  2020-04-11  6:42 [PATCH v4 1/2] tools api: add a lightweight buffered reading api Ian Rogers
  2020-04-11  6:42 ` [PATCH v4 2/2] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
@ 2020-04-13  7:29 ` Namhyung Kim
  2020-04-13 16:21   ` Ian Rogers
  1 sibling, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2020-04-13  7:29 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

Hi Ian,

On Sat, Apr 11, 2020 at 3:42 PM Ian Rogers <irogers@google.com> wrote:
>
> The synthesize benchmark shows the majority of execution time going to
> fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
> reading library that will be used to replace these calls in a follow-up
> CL. Add tests for the library to perf test.
>
> v4 adds the test file missed in v3.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
> + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
> + * returns the character after the hexadecimal value which may be -1 for eof.

I'm not sure returning -1 is good when it actually reads something and
meets EOF.
Although it would have a valid value, users might consider it an error IMHO.
Why not returning 0 instead? (I'm ok with -1 for the later use of the API).


> + * If the read value is larger than a u64 the high-order bits will be dropped.
> + */
> +static inline int io__get_hex(struct io *io, __u64 *hex)
> +{
> +       bool first_read = true;
> +
> +       *hex = 0;
> +       while (true) {
> +               int ch = io__get_char(io);
> +
> +               if (ch < 0)
> +                       return ch;
> +               if (ch >= '0' && ch <= '9')
> +                       *hex = (*hex << 4) | (ch - '0');
> +               else if (ch >= 'a' && ch <= 'f')
> +                       *hex = (*hex << 4) | (ch - 'a' + 10);
> +               else if (ch >= 'A' && ch <= 'F')
> +                       *hex = (*hex << 4) | (ch - 'A' + 10);
> +               else if (first_read)
> +                       return -2;
> +               else
> +                       return ch;
> +               first_read = false;
> +       }
> +}
> +
> +/* Read a positive decimal value with out argument dec. If the first character
> + * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
> + * character after the decimal value which may be -1 for eof. If the read value
> + * is larger than a u64 the high-order bits will be dropped.

Ditto.

Thanks
Namhyung


> + */
> +static inline int io__get_dec(struct io *io, __u64 *dec)
> +{
> +       bool first_read = true;
> +
> +       *dec = 0;
> +       while (true) {
> +               int ch = io__get_char(io);
> +
> +               if (ch < 0)
> +                       return ch;
> +               if (ch >= '0' && ch <= '9')
> +                       *dec = (*dec * 10) + ch - '0';
> +               else if (first_read)
> +                       return -2;
> +               else
> +                       return ch;
> +               first_read = false;
> +       }
> +}
> +
> +#endif /* __API_IO__ */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/2] tools api: add a lightweight buffered reading api
  2020-04-13  7:29 ` [PATCH v4 1/2] tools api: add a lightweight buffered reading api Namhyung Kim
@ 2020-04-13 16:21   ` Ian Rogers
  2020-04-14  0:16     ` Namhyung Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Ian Rogers @ 2020-04-13 16:21 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

On Mon, Apr 13, 2020 at 12:29 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hi Ian,
>
> On Sat, Apr 11, 2020 at 3:42 PM Ian Rogers <irogers@google.com> wrote:
> >
> > The synthesize benchmark shows the majority of execution time going to
> > fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
> > reading library that will be used to replace these calls in a follow-up
> > CL. Add tests for the library to perf test.
> >
> > v4 adds the test file missed in v3.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
> > + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
> > + * returns the character after the hexadecimal value which may be -1 for eof.
>
> I'm not sure returning -1 is good when it actually reads something and
> meets EOF.
> Although it would have a valid value, users might consider it an error IMHO.
> Why not returning 0 instead? (I'm ok with -1 for the later use of the API).

Thanks for the feedback! In the code for /proc/pid/maps this is a
hypothetical, but I think having the API right is important. I didn't
go with 0 as you mention 0 'could' encode a character, for example,
7fffabcd\0 wouldn't be distinguishable from 7fffabcd<EOF>. The updated
code distinguishes the cases as 0 meaning character \0, -1 meaning EOF
and -2 meaning bad encoding. Your worry is that a hex number that's
next to EOF will get a result of -1 showing the EOF came next. and
code that does 'if ( .. < 0)' would trigger. While clunky, it'd be
possible in those cases to change the code to 'if ( .. < -1)'. So my
thoughts are:
1) being able to tell apart the 3 cases could be important - this is
all hypothetical;
2) keeping EOF and error as negative numbers has a degree of consistency;
3) using -1 for EOF comes from get_char, it'd be nice to have one
value mean EOF.
Perhaps the issue is the name of the function? It isn't a standard API
to return the next character, but it simplified things for me as I
didn't need to add a 'rewind' operation. The function names could be
something like io__get_hex_then_char and io__get_dec_then_char, EOF
for the 'then_char' part would be more consistent. I'd tried to keep
the names short and have a load bearing comment, which isn't ideal but
generally I believe the style is that function names are kept short.
Let me know what you think.

Thanks,
Ian

> > + * If the read value is larger than a u64 the high-order bits will be dropped.
> > + */
> > +static inline int io__get_hex(struct io *io, __u64 *hex)
> > +{
> > +       bool first_read = true;
> > +
> > +       *hex = 0;
> > +       while (true) {
> > +               int ch = io__get_char(io);
> > +
> > +               if (ch < 0)
> > +                       return ch;
> > +               if (ch >= '0' && ch <= '9')
> > +                       *hex = (*hex << 4) | (ch - '0');
> > +               else if (ch >= 'a' && ch <= 'f')
> > +                       *hex = (*hex << 4) | (ch - 'a' + 10);
> > +               else if (ch >= 'A' && ch <= 'F')
> > +                       *hex = (*hex << 4) | (ch - 'A' + 10);
> > +               else if (first_read)
> > +                       return -2;
> > +               else
> > +                       return ch;
> > +               first_read = false;
> > +       }
> > +}
> > +
> > +/* Read a positive decimal value with out argument dec. If the first character
> > + * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
> > + * character after the decimal value which may be -1 for eof. If the read value
> > + * is larger than a u64 the high-order bits will be dropped.
>
> Ditto.
>
> Thanks
> Namhyung
>
>
> > + */
> > +static inline int io__get_dec(struct io *io, __u64 *dec)
> > +{
> > +       bool first_read = true;
> > +
> > +       *dec = 0;
> > +       while (true) {
> > +               int ch = io__get_char(io);
> > +
> > +               if (ch < 0)
> > +                       return ch;
> > +               if (ch >= '0' && ch <= '9')
> > +                       *dec = (*dec * 10) + ch - '0';
> > +               else if (first_read)
> > +                       return -2;
> > +               else
> > +                       return ch;
> > +               first_read = false;
> > +       }
> > +}
> > +
> > +#endif /* __API_IO__ */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/2] tools api: add a lightweight buffered reading api
  2020-04-13 16:21   ` Ian Rogers
@ 2020-04-14  0:16     ` Namhyung Kim
  2020-04-14  0:48       ` Ian Rogers
  0 siblings, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2020-04-14  0:16 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

On Tue, Apr 14, 2020 at 1:22 AM Ian Rogers <irogers@google.com> wrote:
>
> On Mon, Apr 13, 2020 at 12:29 AM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > Hi Ian,
> >
> > On Sat, Apr 11, 2020 at 3:42 PM Ian Rogers <irogers@google.com> wrote:
> > >
> > > The synthesize benchmark shows the majority of execution time going to
> > > fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
> > > reading library that will be used to replace these calls in a follow-up
> > > CL. Add tests for the library to perf test.
> > >
> > > v4 adds the test file missed in v3.
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > > +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
> > > + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
> > > + * returns the character after the hexadecimal value which may be -1 for eof.
> >
> > I'm not sure returning -1 is good when it actually reads something and
> > meets EOF.
> > Although it would have a valid value, users might consider it an error IMHO.
> > Why not returning 0 instead? (I'm ok with -1 for the later use of the API).
>
> Thanks for the feedback! In the code for /proc/pid/maps this is a
> hypothetical, but I think having the API right is important. I didn't
> go with 0 as you mention 0 'could' encode a character, for example,
> 7fffabcd\0 wouldn't be distinguishable from 7fffabcd<EOF>.

Practically I don't think it matters in this case as long as we can
distinguish them in the next call (if the user wants to do).
What users want to do (I think) is whether the returned value
(in *hex) is ok to use or not.  By returning -1 on EOF, it might
be confusing for users..


> The updated
> code distinguishes the cases as 0 meaning character \0, -1 meaning EOF
> and -2 meaning bad encoding. Your worry is that a hex number that's
> next to EOF will get a result of -1 showing the EOF came next. and
> code that does 'if ( .. < 0)' would trigger. While clunky, it'd be
> possible in those cases to change the code to 'if ( .. < -1)'.

Yes, but it's not conventional IMHO.


> So my thoughts are:
> 1) being able to tell apart the 3 cases could be important - this is
> all hypothetical;
> 2) keeping EOF and error as negative numbers has a degree of consistency;
> 3) using -1 for EOF comes from get_char, it'd be nice to have one
> value mean EOF.
> Perhaps the issue is the name of the function? It isn't a standard API
> to return the next character, but it simplified things for me as I
> didn't need to add a 'rewind' operation. The function names could be
> something like io__get_hex_then_char and io__get_dec_then_char, EOF
> for the 'then_char' part would be more consistent. I'd tried to keep
> the names short and have a load bearing comment, which isn't ideal but
> generally I believe the style is that function names are kept short.
> Let me know what you think.

I'm ok with the function name and understand your concerns.
And I don't want to insist it strongly but just sharing my thoughts.

Thanks
Namhyung

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/2] tools api: add a lightweight buffered reading api
  2020-04-14  0:16     ` Namhyung Kim
@ 2020-04-14  0:48       ` Ian Rogers
  2020-04-15  2:20         ` Namhyung Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Ian Rogers @ 2020-04-14  0:48 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

On Mon, Apr 13, 2020 at 5:16 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Tue, Apr 14, 2020 at 1:22 AM Ian Rogers <irogers@google.com> wrote:
> >
> > On Mon, Apr 13, 2020 at 12:29 AM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > Hi Ian,
> > >
> > > On Sat, Apr 11, 2020 at 3:42 PM Ian Rogers <irogers@google.com> wrote:
> > > >
> > > > The synthesize benchmark shows the majority of execution time going to
> > > > fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
> > > > reading library that will be used to replace these calls in a follow-up
> > > > CL. Add tests for the library to perf test.
> > > >
> > > > v4 adds the test file missed in v3.
> > > >
> > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > ---
> > > > +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
> > > > + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
> > > > + * returns the character after the hexadecimal value which may be -1 for eof.
> > >
> > > I'm not sure returning -1 is good when it actually reads something and
> > > meets EOF.
> > > Although it would have a valid value, users might consider it an error IMHO.
> > > Why not returning 0 instead? (I'm ok with -1 for the later use of the API).
> >
> > Thanks for the feedback! In the code for /proc/pid/maps this is a
> > hypothetical, but I think having the API right is important. I didn't
> > go with 0 as you mention 0 'could' encode a character, for example,
> > 7fffabcd\0 wouldn't be distinguishable from 7fffabcd<EOF>.
>
> Practically I don't think it matters in this case as long as we can
> distinguish them in the next call (if the user wants to do).
> What users want to do (I think) is whether the returned value
> (in *hex) is ok to use or not.  By returning -1 on EOF, it might
> be confusing for users..

In the /proc/pid/maps case the code for reading an address like
"00400000-00452000 " the code is:

if (io__get_hex(io, start) != '-')
  return false;
if (io__get_hex(io, end) != ' ')
  return false;

If io__get_hex doesn't return the next character it becomes:

if (io__get_hex(io, start))
  return false;
if (io__get_char(io) != '-')
  return false;
if (io__get_hex(io, end))
  return false;
if (io__get_char(io) != ' ')
  return false;

Which is twice as verbose and requires that io have a rewind operation
to go backward when io__get_hex and io__get_dec have gone 1 character
too far.

> > The updated
> > code distinguishes the cases as 0 meaning character \0, -1 meaning EOF
> > and -2 meaning bad encoding. Your worry is that a hex number that's
> > next to EOF will get a result of -1 showing the EOF came next. and
> > code that does 'if ( .. < 0)' would trigger. While clunky, it'd be
> > possible in those cases to change the code to 'if ( .. < -1)'.
>
> Yes, but it's not conventional IMHO.
>
>
> > So my thoughts are:
> > 1) being able to tell apart the 3 cases could be important - this is
> > all hypothetical;
> > 2) keeping EOF and error as negative numbers has a degree of consistency;
> > 3) using -1 for EOF comes from get_char, it'd be nice to have one
> > value mean EOF.
> > Perhaps the issue is the name of the function? It isn't a standard API
> > to return the next character, but it simplified things for me as I
> > didn't need to add a 'rewind' operation. The function names could be
> > something like io__get_hex_then_char and io__get_dec_then_char, EOF
> > for the 'then_char' part would be more consistent. I'd tried to keep
> > the names short and have a load bearing comment, which isn't ideal but
> > generally I believe the style is that function names are kept short.
> > Let me know what you think.
>
> I'm ok with the function name and understand your concerns.
> And I don't want to insist it strongly but just sharing my thoughts.
>
> Thanks
> Namhyung

Thanks, feedback appreciated! It is useful to discuss and it is
straightforward to change the API but I'm in two minds as to whether
it would be better.

I'd still like to land this and the next patch, as getting rid of
fgets/sscanf saves 50us from event synthesis. Breaking out the io part
of that change wasn't done so much with a view to replacing stdio, but
just something minimal that serves the /proc/pid/maps case.

Thanks,
Ian

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/2] tools api: add a lightweight buffered reading api
  2020-04-14  0:48       ` Ian Rogers
@ 2020-04-15  2:20         ` Namhyung Kim
  2020-04-15  2:26           ` Ian Rogers
  0 siblings, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2020-04-15  2:20 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

On Tue, Apr 14, 2020 at 9:48 AM Ian Rogers <irogers@google.com> wrote:
>
> On Mon, Apr 13, 2020 at 5:16 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Tue, Apr 14, 2020 at 1:22 AM Ian Rogers <irogers@google.com> wrote:
> > >
> > > On Mon, Apr 13, 2020 at 12:29 AM Namhyung Kim <namhyung@kernel.org> wrote:
> > > >
> > > > Hi Ian,
> > > >
> > > > On Sat, Apr 11, 2020 at 3:42 PM Ian Rogers <irogers@google.com> wrote:
> > > > >
> > > > > The synthesize benchmark shows the majority of execution time going to
> > > > > fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
> > > > > reading library that will be used to replace these calls in a follow-up
> > > > > CL. Add tests for the library to perf test.
> > > > >
> > > > > v4 adds the test file missed in v3.
> > > > >
> > > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > > ---
> > > > > +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
> > > > > + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
> > > > > + * returns the character after the hexadecimal value which may be -1 for eof.
> > > >
> > > > I'm not sure returning -1 is good when it actually reads something and
> > > > meets EOF.
> > > > Although it would have a valid value, users might consider it an error IMHO.
> > > > Why not returning 0 instead? (I'm ok with -1 for the later use of the API).
> > >
> > > Thanks for the feedback! In the code for /proc/pid/maps this is a
> > > hypothetical, but I think having the API right is important. I didn't
> > > go with 0 as you mention 0 'could' encode a character, for example,
> > > 7fffabcd\0 wouldn't be distinguishable from 7fffabcd<EOF>.
> >
> > Practically I don't think it matters in this case as long as we can
> > distinguish them in the next call (if the user wants to do).
> > What users want to do (I think) is whether the returned value
> > (in *hex) is ok to use or not.  By returning -1 on EOF, it might
> > be confusing for users..
>
> In the /proc/pid/maps case the code for reading an address like
> "00400000-00452000 " the code is:
>
> if (io__get_hex(io, start) != '-')
>   return false;
> if (io__get_hex(io, end) != ' ')
>   return false;
>
> If io__get_hex doesn't return the next character it becomes:
>
> if (io__get_hex(io, start))
>   return false;
> if (io__get_char(io) != '-')
>   return false;
> if (io__get_hex(io, end))
>   return false;
> if (io__get_char(io) != ' ')
>   return false;
>
> Which is twice as verbose and requires that io have a rewind operation
> to go backward when io__get_hex and io__get_dec have gone 1 character
> too far.

Yeah, I'm not against returning the next character - it's good.
The only concern was whether it should return -1 or 0 when
it meets EOF after parsing some digits.

But I think we can go with this version as there's no such case
when parsing /proc/pid/maps.

>
> > > The updated
> > > code distinguishes the cases as 0 meaning character \0, -1 meaning EOF
> > > and -2 meaning bad encoding. Your worry is that a hex number that's
> > > next to EOF will get a result of -1 showing the EOF came next. and
> > > code that does 'if ( .. < 0)' would trigger. While clunky, it'd be
> > > possible in those cases to change the code to 'if ( .. < -1)'.
> >
> > Yes, but it's not conventional IMHO.
> >
> >
> > > So my thoughts are:
> > > 1) being able to tell apart the 3 cases could be important - this is
> > > all hypothetical;
> > > 2) keeping EOF and error as negative numbers has a degree of consistency;
> > > 3) using -1 for EOF comes from get_char, it'd be nice to have one
> > > value mean EOF.
> > > Perhaps the issue is the name of the function? It isn't a standard API
> > > to return the next character, but it simplified things for me as I
> > > didn't need to add a 'rewind' operation. The function names could be
> > > something like io__get_hex_then_char and io__get_dec_then_char, EOF
> > > for the 'then_char' part would be more consistent. I'd tried to keep
> > > the names short and have a load bearing comment, which isn't ideal but
> > > generally I believe the style is that function names are kept short.
> > > Let me know what you think.
> >
> > I'm ok with the function name and understand your concerns.
> > And I don't want to insist it strongly but just sharing my thoughts.
> >
> > Thanks
> > Namhyung
>
> Thanks, feedback appreciated! It is useful to discuss and it is
> straightforward to change the API but I'm in two minds as to whether
> it would be better.
>
> I'd still like to land this and the next patch, as getting rid of
> fgets/sscanf saves 50us from event synthesis. Breaking out the io part
> of that change wasn't done so much with a view to replacing stdio, but
> just something minimal that serves the /proc/pid/maps case.

The performance gain looks nice!  Thanks for working on this.

Thanks
Namhyung

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v4 1/2] tools api: add a lightweight buffered reading api
  2020-04-15  2:20         ` Namhyung Kim
@ 2020-04-15  2:26           ` Ian Rogers
  0 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2020-04-15  2:26 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Petr Mladek,
	Andrey Zhizhikin, Kefeng Wang, Thomas Gleixner, Kan Liang,
	linux-kernel, linux-perf-users, Stephane Eranian

On Tue, Apr 14, 2020 at 7:20 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Tue, Apr 14, 2020 at 9:48 AM Ian Rogers <irogers@google.com> wrote:
> >
> > On Mon, Apr 13, 2020 at 5:16 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > On Tue, Apr 14, 2020 at 1:22 AM Ian Rogers <irogers@google.com> wrote:
> > > >
> > > > On Mon, Apr 13, 2020 at 12:29 AM Namhyung Kim <namhyung@kernel.org> wrote:
> > > > >
> > > > > Hi Ian,
> > > > >
> > > > > On Sat, Apr 11, 2020 at 3:42 PM Ian Rogers <irogers@google.com> wrote:
> > > > > >
> > > > > > The synthesize benchmark shows the majority of execution time going to
> > > > > > fgets and sscanf, necessary to parse /proc/pid/maps. Add a new buffered
> > > > > > reading library that will be used to replace these calls in a follow-up
> > > > > > CL. Add tests for the library to perf test.
> > > > > >
> > > > > > v4 adds the test file missed in v3.
> > > > > >
> > > > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > > > ---
> > > > > > +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
> > > > > > + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
> > > > > > + * returns the character after the hexadecimal value which may be -1 for eof.
> > > > >
> > > > > I'm not sure returning -1 is good when it actually reads something and
> > > > > meets EOF.
> > > > > Although it would have a valid value, users might consider it an error IMHO.
> > > > > Why not returning 0 instead? (I'm ok with -1 for the later use of the API).
> > > >
> > > > Thanks for the feedback! In the code for /proc/pid/maps this is a
> > > > hypothetical, but I think having the API right is important. I didn't
> > > > go with 0 as you mention 0 'could' encode a character, for example,
> > > > 7fffabcd\0 wouldn't be distinguishable from 7fffabcd<EOF>.
> > >
> > > Practically I don't think it matters in this case as long as we can
> > > distinguish them in the next call (if the user wants to do).
> > > What users want to do (I think) is whether the returned value
> > > (in *hex) is ok to use or not.  By returning -1 on EOF, it might
> > > be confusing for users..
> >
> > In the /proc/pid/maps case the code for reading an address like
> > "00400000-00452000 " the code is:
> >
> > if (io__get_hex(io, start) != '-')
> >   return false;
> > if (io__get_hex(io, end) != ' ')
> >   return false;
> >
> > If io__get_hex doesn't return the next character it becomes:
> >
> > if (io__get_hex(io, start))
> >   return false;
> > if (io__get_char(io) != '-')
> >   return false;
> > if (io__get_hex(io, end))
> >   return false;
> > if (io__get_char(io) != ' ')
> >   return false;
> >
> > Which is twice as verbose and requires that io have a rewind operation
> > to go backward when io__get_hex and io__get_dec have gone 1 character
> > too far.
>
> Yeah, I'm not against returning the next character - it's good.
> The only concern was whether it should return -1 or 0 when
> it meets EOF after parsing some digits.
>
> But I think we can go with this version as there's no such case
> when parsing /proc/pid/maps.
>
> >
> > > > The updated
> > > > code distinguishes the cases as 0 meaning character \0, -1 meaning EOF
> > > > and -2 meaning bad encoding. Your worry is that a hex number that's
> > > > next to EOF will get a result of -1 showing the EOF came next. and
> > > > code that does 'if ( .. < 0)' would trigger. While clunky, it'd be
> > > > possible in those cases to change the code to 'if ( .. < -1)'.
> > >
> > > Yes, but it's not conventional IMHO.
> > >
> > >
> > > > So my thoughts are:
> > > > 1) being able to tell apart the 3 cases could be important - this is
> > > > all hypothetical;
> > > > 2) keeping EOF and error as negative numbers has a degree of consistency;
> > > > 3) using -1 for EOF comes from get_char, it'd be nice to have one
> > > > value mean EOF.
> > > > Perhaps the issue is the name of the function? It isn't a standard API
> > > > to return the next character, but it simplified things for me as I
> > > > didn't need to add a 'rewind' operation. The function names could be
> > > > something like io__get_hex_then_char and io__get_dec_then_char, EOF
> > > > for the 'then_char' part would be more consistent. I'd tried to keep
> > > > the names short and have a load bearing comment, which isn't ideal but
> > > > generally I believe the style is that function names are kept short.
> > > > Let me know what you think.
> > >
> > > I'm ok with the function name and understand your concerns.
> > > And I don't want to insist it strongly but just sharing my thoughts.
> > >
> > > Thanks
> > > Namhyung
> >
> > Thanks, feedback appreciated! It is useful to discuss and it is
> > straightforward to change the API but I'm in two minds as to whether
> > it would be better.
> >
> > I'd still like to land this and the next patch, as getting rid of
> > fgets/sscanf saves 50us from event synthesis. Breaking out the io part
> > of that change wasn't done so much with a view to replacing stdio, but
> > just something minimal that serves the /proc/pid/maps case.
>
> The performance gain looks nice!  Thanks for working on this.

Thanks for the feedback! I think we can keep an eye on the API, the
idea to change to 0 isn't an unreasonable one. I have some other
multithreaded synthesis improving patches to send from Stephane, and
I've updated the benchmark to measure these. I'll re-send these
patches with the new single and multi-threaded benchmark data.

Thanks,
Ian

> Thanks
> Namhyung

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-04-15  2:27 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-11  6:42 [PATCH v4 1/2] tools api: add a lightweight buffered reading api Ian Rogers
2020-04-11  6:42 ` [PATCH v4 2/2] perf synthetic events: Remove use of sscanf from /proc reading Ian Rogers
2020-04-13  7:29 ` [PATCH v4 1/2] tools api: add a lightweight buffered reading api Namhyung Kim
2020-04-13 16:21   ` Ian Rogers
2020-04-14  0:16     ` Namhyung Kim
2020-04-14  0:48       ` Ian Rogers
2020-04-15  2:20         ` Namhyung Kim
2020-04-15  2:26           ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).