linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>, Thomas Gleixner <tglx@linutronix.de>
Cc: "Jiri Olsa" <jolsa@kernel.org>,
	"Namhyung Kim" <namhyung@kernel.org>,
	"Clark Williams" <williams@redhat.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	"Arnaldo Carvalho de Melo" <acme@redhat.com>,
	"Adrian Hunter" <adrian.hunter@intel.com>,
	"Luis Cláudio Gonçalves" <lclaudio@redhat.com>
Subject: [PATCH 33/37] perf trace: Reuse BPF augmenters from syscalls with similar args signature
Date: Mon, 22 Jul 2019 14:38:35 -0300	[thread overview]
Message-ID: <20190722173839.22898-34-acme@kernel.org> (raw)
In-Reply-To: <20190722173839.22898-1-acme@kernel.org>

From: Arnaldo Carvalho de Melo <acme@redhat.com>

We have an augmenter for the "open" syscall, which has just one pointer,
in the first argument, a "const char *", so any other syscall that has
just one pointer and that is the first can reuse the "open" BPF
augmenter program.

Even more, syscalls that get two pointers with the first being a string
can reuse "open"'s BPF augmenter till we have an augmenter that better
matches that syscall with two pointers.

With this the few augmenters we have, for open (first arg is a string),
openat (2nd arg is a string), renameat (2nd and 4th are strings) can be
reused by a lot of syscalls, ditto for "bind" reusing "connect" because
both have the 2nd argument as a sockaddr and the 3rd as its len.

Lets see how this makes the "bind" syscall reuse the "connect" BPF prog
augmenter found in tools/perf/examples/bpf/augmented_raw_syscalls.c:

  # perf trace -e bind,connect systemctl restart sshd
  connect(3, { .family: PF_LOCAL, path: /run/systemd/private }, 23) = 0
  #

Oh, it just connects to some daemon, so we better do it system wide and then
stop/start sshd:

  # perf trace -e bind,connect
  systemctl/10124 connect(3, { .family: PF_LOCAL, path: /run/systemd/private }, 23) = 0
  sshd/10102 connect(7, { .family: PF_LOCAL, path: /dev/log }, 110) = 0
  systemctl/10126 connect(3, { .family: PF_LOCAL, path: /run/systemd/private }, 23) = 0
  systemd/10128  ... [continued]: connect())            = 0
  (sshd)/10128 connect(3, { .family: PF_LOCAL, path: /run/systemd/journal/stdout }, 30) ...
  sshd/10128 bind(3, { .family: PF_NETLINK }, 12)    = 0
  sshd/10128 connect(4, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory)
  sshd/10128 connect(3, { .family: PF_INET6, port: 22, addr: :: }, 28) = 0
  sshd/10128 connect(3, { .family: PF_UNSPEC }, 16)  = 0
  sshd/10128 connect(3, { .family: PF_INET, port: 22, addr: 0.0.0.0 }, 16) = 0
  sshd/10128 connect(3, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory)
  sshd/10128 connect(3, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory)
  sshd/10128 connect(5, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory)
  sshd/10128 connect(5, { .family: PF_LOCAL, path: /var/run/nscd/socket }, 110) = -1 ENOENT (No such file or directory)
  sshd/10128 bind(4, { .family: PF_INET, port: 22, addr: 0.0.0.0 }, 16) = 0
  sshd/10128 connect(6, { .family: PF_LOCAL, path: /dev/log }, 110) = 0
  sshd/10128 bind(6, { .family: PF_INET6, port: 22, addr: :: }, 28) = 0
  sshd/10128 connect(7, { .family: PF_LOCAL, path: /dev/log }, 110) = 0
  ^C#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-zfley2ghs4nim1uq4nu6ed3l@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 154 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 152 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d8565c9a18a2..200fbe33d5de 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -709,7 +709,6 @@ static struct syscall_fmt {
 	  .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ },
 		   [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
 	{ .name	    = "bind",
-	  .bpf_prog_name = { .sys_enter = "!syscalls:sys_enter_connect", },
 	  .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
 		   [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
 		   [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
@@ -879,7 +878,6 @@ static struct syscall_fmt {
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
 		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, },
 	{ .name	    = "renameat2",
-	  .bpf_prog_name = { .sys_enter = "!syscalls:sys_enter_renameat", },
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
 		   [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
 		   [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
@@ -2910,6 +2908,94 @@ static int trace__init_syscalls_bpf_map(struct trace *trace)
 	return __trace__init_syscalls_bpf_map(trace, enabled);
 }
 
+static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
+{
+	struct tep_format_field *field, *candidate_field;
+	int id;
+
+	/*
+	 * We're only interested in syscalls that have a pointer:
+	 */
+	for (field = sc->args; field; field = field->next) {
+		if (field->flags & TEP_FIELD_IS_POINTER)
+			goto try_to_find_pair;
+	}
+
+	return NULL;
+
+try_to_find_pair:
+	for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
+		struct syscall *pair = trace__syscall_info(trace, NULL, id);
+		struct bpf_program *pair_prog;
+		bool is_candidate = false;
+
+		if (pair == NULL || pair == sc ||
+		    pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
+			continue;
+
+		for (field = sc->args, candidate_field = pair->args;
+		     field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
+			bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
+			     candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
+
+			if (is_pointer) {
+			       if (!candidate_is_pointer) {
+					// The candidate just doesn't copies our pointer arg, might copy other pointers we want.
+					continue;
+			       }
+			} else {
+				if (candidate_is_pointer) {
+					// The candidate might copy a pointer we don't have, skip it.
+					goto next_candidate;
+				}
+				continue;
+			}
+
+			if (strcmp(field->type, candidate_field->type))
+				goto next_candidate;
+
+			is_candidate = true;
+		}
+
+		if (!is_candidate)
+			goto next_candidate;
+
+		/*
+		 * Check if the tentative pair syscall augmenter has more pointers, if it has,
+		 * then it may be collecting that and we then can't use it, as it would collect
+		 * more than what is common to the two syscalls.
+		 */
+		if (candidate_field) {
+			for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
+				if (candidate_field->flags & TEP_FIELD_IS_POINTER)
+					goto next_candidate;
+		}
+
+		pair_prog = pair->bpf_prog.sys_enter;
+		/*
+		 * If the pair isn't enabled, then its bpf_prog.sys_enter will not
+		 * have been searched for, so search it here and if it returns the
+		 * unaugmented one, then ignore it, otherwise we'll reuse that BPF
+		 * program for a filtered syscall on a non-filtered one.
+		 *
+		 * For instance, we have "!syscalls:sys_enter_renameat" and that is
+		 * useful for "renameat2".
+		 */
+		if (pair_prog == NULL) {
+			pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
+			if (pair_prog == trace->syscalls.unaugmented_prog)
+				goto next_candidate;
+		}
+
+		pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
+		return pair_prog;
+	next_candidate:
+		continue;
+	}
+
+	return NULL;
+}
+
 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
 {
 	int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
@@ -2935,6 +3021,70 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
 			break;
 	}
 
+	/*
+	 * Now lets do a second pass looking for enabled syscalls without
+	 * an augmenter that have a signature that is a superset of another
+	 * syscall with an augmenter so that we can auto-reuse it.
+	 *
+	 * I.e. if we have an augmenter for the "open" syscall that has
+	 * this signature:
+	 *
+	 *   int open(const char *pathname, int flags, mode_t mode);
+	 *
+	 * I.e. that will collect just the first string argument, then we
+	 * can reuse it for the 'creat' syscall, that has this signature:
+	 *
+	 *   int creat(const char *pathname, mode_t mode);
+	 *
+	 * and for:
+	 *
+	 *   int stat(const char *pathname, struct stat *statbuf);
+	 *   int lstat(const char *pathname, struct stat *statbuf);
+	 *
+	 * Because the 'open' augmenter will collect the first arg as a string,
+	 * and leave alone all the other args, which already helps with
+	 * beautifying 'stat' and 'lstat''s pathname arg.
+	 *
+	 * Then, in time, when 'stat' gets an augmenter that collects both
+	 * first and second arg (this one on the raw_syscalls:sys_exit prog
+	 * array tail call, then that one will be used.
+	 */
+	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
+		struct syscall *sc = trace__syscall_info(trace, NULL, key);
+		struct bpf_program *pair_prog;
+		int prog_fd;
+
+		if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
+			continue;
+
+		/*
+		 * For now we're just reusing the sys_enter prog, and if it
+		 * already has an augmenter, we don't need to find one.
+		 */
+		if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
+			continue;
+
+		/*
+		 * Look at all the other syscalls for one that has a signature
+		 * that is close enough that we can share:
+		 */
+		pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
+		if (pair_prog == NULL)
+			continue;
+
+		sc->bpf_prog.sys_enter = pair_prog;
+
+		/*
+		 * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter
+		 * with the fd for the program we're reusing:
+		 */
+		prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
+		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
+		if (err)
+			break;
+	}
+
+
 	return err;
 }
 #else
-- 
2.21.0


  parent reply	other threads:[~2019-07-22 17:42 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-22 17:38 [GIT PULL] perf/core improvements and fixes Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 01/37] perf include bpf: Add bpf_tail_call() prototype Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 02/37] perf bpf: Do not attach a BPF prog to a tracepoint if its name starts with ! Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 03/37] perf evsel: Store backpointer to attached bpf_object Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 04/37] perf trace: Add pointer to BPF object containing __augmented_syscalls__ Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 05/37] perf trace: Look up maps just on the __augmented_syscalls__ BPF object Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 06/37] perf trace: Order -e syscalls table Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 07/37] perf trace: Add BPF handler for unaugmented syscalls Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 08/37] perf trace: Allow specifying the bpf prog to augment specific syscalls Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 09/37] perf trace: Put the per-syscall entry/exit prog_array BPF map infrastructure in place Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 10/37] perf trace: Handle raw_syscalls:sys_enter just like the BPF_OUTPUT augmented event Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 11/37] perf augmented_raw_syscalls: Add handler for "openat" Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 12/37] perf augmented_raw_syscalls: Switch to using BPF_MAP_TYPE_PROG_ARRAY Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 13/37] perf augmented_raw_syscalls: Support copying two string syscall args Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 14/37] perf trace: Look for default name for entries in the syscalls prog array Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 15/37] perf augmented_raw_syscalls: Rename augmented_args_filename to augmented_args_payload Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 16/37] perf script: Fix --max-blocks man page description Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 17/37] perf script: Improve man page description of metrics Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 18/37] perf script: Fix off by one in brstackinsn IPC computation Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 19/37] perf tools: Fix proper buffer size for feature processing Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 20/37] perf stat: Fix segfault for event group in repeat mode Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 21/37] perf augmented_raw_syscalls: Augment sockaddr arg in 'connect' Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 22/37] perf trace beauty: Make connect's addrlen be printed as an int, not hex Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 23/37] perf trace beauty: Disable fd->pathname when close() not enabled Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 24/37] perf trace beauty: Do not try to use the fd->pathname beautifier for bind/connect fd arg Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 25/37] perf trace beauty: Beautify 'sendto's sockaddr arg Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 26/37] perf trace beauty: Beautify bind's " Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 27/37] perf stat: Always separate stalled cycles per insn Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 28/37] perf session: Fix loading of compressed data split across adjacent records Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 29/37] perf trace beauty: Add BPF augmenter for the 'rename' syscall Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 30/37] perf trace: Forward error codes when trying to read syscall info Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 31/37] perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 32/37] perf trace: Preallocate the syscall table Arnaldo Carvalho de Melo
2019-07-22 17:38 ` Arnaldo Carvalho de Melo [this message]
2019-07-22 17:38 ` [PATCH 34/37] perf trace: Add "sendfile64" alias to the "sendfile" syscall Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 35/37] perf probe: Set pev->nargs to zero after freeing pev->args entries Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 36/37] perf probe: Avoid calling freeing routine multiple times for same pointer Arnaldo Carvalho de Melo
2019-07-22 17:38 ` [PATCH 37/37] perf build: Do not use -Wshadow on gcc < 4.8 Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190722173839.22898-34-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=jolsa@kernel.org \
    --cc=lclaudio@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).