From: Song Liu <songliubraving@fb.com>
To: <peterz@infradead.org>, <rostedt@goodmis.org>, <mingo@redhat.com>,
<davem@davemloft.net>, <netdev@vger.kernel.org>,
<linux-kernel@vger.kernel.org>, <daniel@iogearbox.net>
Cc: <kernel-team@fb.com>, Song Liu <songliubraving@fb.com>
Subject: [PATCH v5] bcc: Try use new API to create [k,u]probe with perf_event_open
Date: Wed, 6 Dec 2017 14:45:11 -0800 [thread overview]
Message-ID: <20171206224518.3598254-2-songliubraving@fb.com> (raw)
In-Reply-To: <20171206224518.3598254-1-songliubraving@fb.com>
New kernel API allows creating [k,u]probe with perf_event_open.
This patch tries to use the new API. If the new API doesn't work,
we fall back to old API.
bpf_detach_probe() looks up the event being removed. If the event
is not found, we skip the clean up procedure.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
src/cc/libbpf.c | 264 +++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 196 insertions(+), 68 deletions(-)
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index ef6daf3..1ac685f 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -526,38 +526,113 @@ int bpf_attach_socket(int sock, int prog) {
return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
}
-static int bpf_attach_tracing_event(int progfd, const char *event_path,
- struct perf_reader *reader, int pid, int cpu, int group_fd) {
- int efd, pfd;
- ssize_t bytes;
- char buf[256];
- struct perf_event_attr attr = {};
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+ int fd;
+ int ret;
+ char buf[64];
- snprintf(buf, sizeof(buf), "%s/id", event_path);
- efd = open(buf, O_RDONLY, 0);
- if (efd < 0) {
- fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
return -1;
- }
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret < 0 || ret >= sizeof(buf))
+ return -1;
+ ret = (int)strtol(buf, NULL, 10);
+ return errno ? -1 : ret;
+}
- bytes = read(efd, buf, sizeof(buf));
- if (bytes <= 0 || bytes >= sizeof(buf)) {
- fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
- close(efd);
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+ int fd;
+ int ret;
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret < 0 || ret >= sizeof(buf))
+ return -1;
+ if (strlen(buf) < strlen("config:"))
+ return -1;
+ ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+ return errno ? -1 : ret;
+}
+
+/*
+ * new kernel API allows creating [k,u]probe with perf_event_open, which
+ * makes it easier to clean up the [k,u]probe. This function tries to
+ * create pfd with the new API.
+ */
+static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
+ int pid, int cpu, int group_fd, char *event_type, int is_return)
+{
+ struct perf_event_attr attr = {};
+ int type = bpf_find_probe_type(event_type);
+ int is_return_bit = bpf_get_retprobe_bit(event_type);
+
+ if (type < 0 || is_return_bit < 0)
return -1;
- }
- close(efd);
- buf[bytes] = '\0';
- attr.config = strtol(buf, NULL, 0);
- attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
attr.sample_period = 1;
attr.wakeup_events = 1;
- pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
+ if (is_return)
+ attr.config |= 1 << is_return_bit;
+ attr.probe_offset = offs; /* for kprobe, if name is NULL, this the addr */
+ attr.size = sizeof(attr);
+ attr.type = type;
+ attr.kprobe_func = ptr_to_u64((void *)name); /* also work for uprobe_path */
+ return syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd,
+ PERF_FLAG_FD_CLOEXEC);
+}
+
+static int bpf_attach_tracing_event(int progfd, const char *event_path,
+ struct perf_reader *reader, int pid, int cpu, int group_fd, int pfd) {
+ int efd;
+ ssize_t bytes;
+ char buf[256];
+ struct perf_event_attr attr = {};
+
+ /*
+ * Only look up id and call perf_event_open when
+ * bpf_try_perf_event_open_with_probe() didn't returns valid pfd.
+ */
if (pfd < 0) {
- fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
- return -1;
+ snprintf(buf, sizeof(buf), "%s/id", event_path);
+ efd = open(buf, O_RDONLY, 0);
+ if (efd < 0) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ return -1;
+ }
+
+ bytes = read(efd, buf, sizeof(buf));
+ if (bytes <= 0 || bytes >= sizeof(buf)) {
+ fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
+ close(efd);
+ return -1;
+ }
+ close(efd);
+ buf[bytes] = '\0';
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
+ return -1;
+ }
}
+
perf_reader_set_fd(reader, pfd);
if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
@@ -585,31 +660,39 @@ void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, con
char event_alias[128];
struct perf_reader *reader = NULL;
static char *event_type = "kprobe";
+ int pfd;
reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
- kfd = open(buf, O_WRONLY | O_APPEND, 0);
- if (kfd < 0) {
- fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
- goto error;
- }
+ /* try use new API to create kprobe */
+ pfd = bpf_try_perf_event_open_with_probe(fn_name, 0, pid, cpu, group_fd,
+ event_type,
+ attach_type != BPF_PROBE_ENTRY);
- snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
- snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
- event_type, event_alias, fn_name);
- if (write(kfd, buf, strlen(buf)) < 0) {
- if (errno == EINVAL)
- fprintf(stderr, "check dmesg output for possible cause\n");
+ if (pfd < 0) {
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ if (kfd < 0) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ goto error;
+ }
+
+ snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
+ snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
+ event_type, event_alias, fn_name);
+ if (write(kfd, buf, strlen(buf)) < 0) {
+ if (errno == EINVAL)
+ fprintf(stderr, "check dmesg output for possible cause\n");
+ close(kfd);
+ goto error;
+ }
close(kfd);
- goto error;
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
}
- close(kfd);
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
- if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+ if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, pfd) < 0)
goto error;
return reader;
@@ -691,42 +774,50 @@ void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, con
struct perf_reader *reader = NULL;
static char *event_type = "uprobe";
int res, kfd = -1, ns_fd = -1;
+ int pfd = -1;
reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
- kfd = open(buf, O_WRONLY | O_APPEND, 0);
- if (kfd < 0) {
- fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
- goto error;
- }
+ /* try use new API to create uprobe */
+ pfd = bpf_try_perf_event_open_with_probe(binary_path, offset, pid, cpu,
+ group_fd, event_type, attach_type != BPF_PROBE_ENTRY);
- res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
- if (res < 0 || res >= sizeof(event_alias)) {
- fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
- goto error;
- }
- res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
- event_type, event_alias, binary_path, offset);
- if (res < 0 || res >= sizeof(buf)) {
- fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
- goto error;
- }
+ if (pfd < 0) {
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ if (kfd < 0) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ goto error;
+ }
- ns_fd = enter_mount_ns(pid);
- if (write(kfd, buf, strlen(buf)) < 0) {
- if (errno == EINVAL)
- fprintf(stderr, "check dmesg output for possible cause\n");
- goto error;
+ res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
+ if (res < 0 || res >= sizeof(event_alias)) {
+ fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
+ goto error;
+ }
+ res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
+ event_type, event_alias, binary_path, offset);
+ if (res < 0 || res >= sizeof(buf)) {
+ fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
+ goto error;
+ }
+
+ ns_fd = enter_mount_ns(pid);
+ if (write(kfd, buf, strlen(buf)) < 0) {
+ if (errno == EINVAL)
+ fprintf(stderr, "check dmesg output for possible cause\n");
+ goto error;
+ }
+ close(kfd);
+ exit_mount_ns(ns_fd);
+ ns_fd = -1;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
}
- close(kfd);
- exit_mount_ns(ns_fd);
- ns_fd = -1;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
- if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+ if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, pfd) < 0)
goto error;
return reader;
@@ -741,8 +832,43 @@ error:
static int bpf_detach_probe(const char *ev_name, const char *event_type)
{
- int kfd, res;
+ int kfd = -1, res;
char buf[PATH_MAX];
+ int found_event = 0;
+ size_t bufsize = 0;
+ char *cptr = NULL;
+ FILE *fp;
+
+ /*
+ * For [k,u]probe created with perf_event_open (on newer kernel), it is
+ * not necessary to clean it up in [k,u]probe_events. We first look up
+ * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
+ * it is safe to skip the cleaning up process (write -:... to the file).
+ */
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+ fp = fopen(buf, "r");
+ if (!fp) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ goto error;
+ }
+
+ res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
+ if (res < 0 || res >= sizeof(buf)) {
+ fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
+ goto error;
+ }
+
+ while (getline(&cptr, &bufsize, fp) != -1)
+ if (strstr(cptr, buf) != NULL) {
+ found_event = 1;
+ break;
+ }
+ fclose(fp);
+ fp = NULL;
+
+ if (!found_event)
+ return 0;
+
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
kfd = open(buf, O_WRONLY | O_APPEND, 0);
if (kfd < 0) {
@@ -766,6 +892,8 @@ static int bpf_detach_probe(const char *ev_name, const char *event_type)
error:
if (kfd >= 0)
close(kfd);
+ if (fp)
+ fclose(fp);
return -1;
}
@@ -792,7 +920,7 @@ void * bpf_attach_tracepoint(int progfd, const char *tp_category,
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
tp_category, tp_name);
- if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+ if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, -1) < 0)
goto error;
return reader;
--
2.9.5
next prev parent reply other threads:[~2017-12-06 22:47 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
2017-12-06 22:45 ` Song Liu [this message]
2017-12-06 22:45 ` [PATCH v5] perf_event_open.2: add type kprobe and uprobe Song Liu
2017-12-06 22:45 ` [PATCH v5 1/6] perf: prepare perf_event.h for new types perf_kprobe and perf_uprobe Song Liu
2018-02-06 11:52 ` [tip:perf/core] perf/core: Prepare perf_event.h for new types: 'perf_kprobe' and 'perf_uprobe' tip-bot for Song Liu
2017-12-06 22:45 ` [PATCH v5 2/6] perf: copy new perf_event.h to tools/include/uapi Song Liu
2018-02-06 11:52 ` [tip:perf/core] perf/headers: Sync new perf_event.h with the tools/include/uapi version tip-bot for Song Liu
2018-02-13 22:38 ` Alexei Starovoitov
2018-02-14 7:51 ` Ingo Molnar
2017-12-06 22:45 ` [PATCH v5 3/6] perf: implement pmu perf_kprobe Song Liu
2017-12-20 10:03 ` Peter Zijlstra
2017-12-20 10:14 ` Peter Zijlstra
2017-12-20 18:10 ` Song Liu
2017-12-20 21:25 ` Peter Zijlstra
2017-12-20 22:05 ` Song Liu
2018-02-06 11:59 ` [tip:perf/core] perf/core: Implement the 'perf_kprobe' PMU tip-bot for Song Liu
2017-12-06 22:45 ` [PATCH v5 4/6] perf: implement pmu perf_uprobe Song Liu
2018-02-06 11:59 ` [tip:perf/core] perf/core: Implement the 'perf_uprobe' PMU tip-bot for Song Liu
2017-12-06 22:45 ` [PATCH v5 5/6] bpf: add option for bpf_load.c to use perf_kprobe Song Liu
2017-12-06 22:45 ` [PATCH v5 6/6] bpf: add new test test_many_kprobe Song Liu
2017-12-07 10:23 ` [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Philippe Ombredanne
2017-12-08 19:57 ` Daniel Borkmann
2017-12-19 17:24 ` Song Liu
2017-12-19 20:25 ` Peter Zijlstra
2018-04-10 4:45 ` Ravi Bangoria
2018-04-10 4:54 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171206224518.3598254-2-songliubraving@fb.com \
--to=songliubraving@fb.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=kernel-team@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).