All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] selftests/bpf: Add BPF trampoline performance test
@ 2019-11-22  1:15 Alexei Starovoitov
  2019-11-22 17:37 ` John Fastabend
  2019-11-22 23:47 ` Daniel Borkmann
  0 siblings, 2 replies; 3+ messages in thread
From: Alexei Starovoitov @ 2019-11-22  1:15 UTC (permalink / raw)
  To: davem; +Cc: daniel, netdev, bpf, kernel-team

Add a test that benchmarks different ways of attaching BPF program to a kernel function.
Here are the results for 2.4Ghz x86 cpu on a kernel without mitigations:
$ ./test_progs -n 49 -v|grep events
task_rename base	2743K events per sec
task_rename kprobe	2419K events per sec
task_rename kretprobe	1876K events per sec
task_rename raw_tp	2578K events per sec
task_rename fentry	2710K events per sec
task_rename fexit	2685K events per sec

On a kernel with retpoline:
$ ./test_progs -n 49 -v|grep events
task_rename base	2401K events per sec
task_rename kprobe	1930K events per sec
task_rename kretprobe	1485K events per sec
task_rename raw_tp	2053K events per sec
task_rename fentry	2351K events per sec
task_rename fexit	2185K events per sec

All 5 approaches:
- kprobe/kretprobe in __set_task_comm()
- raw tracepoint in trace_task_rename()
- fentry/fexit in __set_task_comm()
are roughly equivalent.

__set_task_comm() by itself is quite fast, so any extra instructions add up.
Until BPF trampoline was introduced the fastest mechanism was raw tracepoint.
kprobe via ftrace was second best. kretprobe is slow due to trap. New
fentry/fexit methods via BPF trampoline are clearly the fastest and the
difference is more pronounced with retpoline on, since BPF trampoline doesn't
use indirect jumps.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/test_overhead.c  | 142 ++++++++++++++++++
 .../selftests/bpf/progs/test_overhead.c       |  43 ++++++
 2 files changed, 185 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_overhead.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_overhead.c

diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
new file mode 100644
index 000000000000..c32aa28bd93f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+
+#define MAX_CNT 100000
+
+static __u64 time_get_ns(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static int test_task_rename(const char *prog)
+{
+	int i, fd, duration = 0, err;
+	char buf[] = "test\n";
+	__u64 start_time;
+
+	fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (CHECK(fd < 0, "open /proc", "err %d", errno))
+		return -1;
+	start_time = time_get_ns();
+	for (i = 0; i < MAX_CNT; i++) {
+		err = write(fd, buf, sizeof(buf));
+		if (err < 0) {
+			CHECK(err < 0, "task rename", "err %d", errno);
+			close(fd);
+			return -1;
+		}
+	}
+	printf("task_rename %s\t%lluK events per sec\n", prog,
+	       MAX_CNT * 1000000ll / (time_get_ns() - start_time));
+	close(fd);
+	return 0;
+}
+
+static void test_run(const char *prog)
+{
+	test_task_rename(prog);
+}
+
+static void setaffinity(void)
+{
+	cpu_set_t cpuset;
+	int cpu = 0;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	sched_setaffinity(0, sizeof(cpuset), &cpuset);
+}
+
+void test_test_overhead(void)
+{
+	const char *kprobe_name = "kprobe/__set_task_comm";
+	const char *kretprobe_name = "kretprobe/__set_task_comm";
+	const char *raw_tp_name = "raw_tp/task_rename";
+	const char *fentry_name = "fentry/__set_task_comm";
+	const char *fexit_name = "fexit/__set_task_comm";
+	const char *kprobe_func = "__set_task_comm";
+	struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
+	struct bpf_program *fentry_prog, *fexit_prog;
+	struct bpf_object *obj;
+	struct bpf_link *link;
+	int err, duration = 0;
+
+	obj = bpf_object__open_file("./test_overhead.o", NULL);
+	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+		return;
+
+	kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+	if (CHECK(!kprobe_prog, "find_probe",
+		  "prog '%s' not found\n", kprobe_name))
+		goto cleanup;
+	kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+	if (CHECK(!kretprobe_prog, "find_probe",
+		  "prog '%s' not found\n", kretprobe_name))
+		goto cleanup;
+	raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name);
+	if (CHECK(!raw_tp_prog, "find_probe",
+		  "prog '%s' not found\n", raw_tp_name))
+		goto cleanup;
+	fentry_prog = bpf_object__find_program_by_title(obj, fentry_name);
+	if (CHECK(!fentry_prog, "find_probe",
+		  "prog '%s' not found\n", fentry_name))
+		goto cleanup;
+	fexit_prog = bpf_object__find_program_by_title(obj, fexit_name);
+	if (CHECK(!fexit_prog, "find_probe",
+		  "prog '%s' not found\n", fexit_name))
+		goto cleanup;
+
+	err = bpf_object__load(obj);
+	if (CHECK(err, "obj_load", "err %d\n", err))
+		goto cleanup;
+
+	setaffinity();
+
+	/* base line run */
+	test_run("base");
+
+	/* attach kprobe */
+	link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
+					  kprobe_func);
+	if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("kprobe");
+	bpf_link__destroy(link);
+
+	/* attach kretprobe */
+	link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
+					  kprobe_func);
+	if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("kretprobe");
+	bpf_link__destroy(link);
+
+	/* attach raw_tp */
+	link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
+	if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("raw_tp");
+	bpf_link__destroy(link);
+
+	/* attach fentry */
+	link = bpf_program__attach_trace(fentry_prog);
+	if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("fentry");
+	bpf_link__destroy(link);
+
+	/* attach fexit */
+	link = bpf_program__attach_trace(fexit_prog);
+	if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("fexit");
+	bpf_link__destroy(link);
+cleanup:
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
new file mode 100644
index 000000000000..ef06b2693f96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_tracing.h"
+
+SEC("kprobe/__set_task_comm")
+int prog1(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("kretprobe/__set_task_comm")
+int prog2(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("raw_tp/task_rename")
+int prog3(struct bpf_raw_tracepoint_args *ctx)
+{
+	return 0;
+}
+
+struct __set_task_comm_args {
+	struct task_struct *tsk;
+	const char *buf;
+	ku8 exec;
+};
+
+SEC("fentry/__set_task_comm")
+int prog4(struct __set_task_comm_args *ctx)
+{
+	return 0;
+}
+
+SEC("fexit/__set_task_comm")
+int prog5(struct __set_task_comm_args *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH bpf-next] selftests/bpf: Add BPF trampoline performance test
  2019-11-22  1:15 [PATCH bpf-next] selftests/bpf: Add BPF trampoline performance test Alexei Starovoitov
@ 2019-11-22 17:37 ` John Fastabend
  2019-11-22 23:47 ` Daniel Borkmann
  1 sibling, 0 replies; 3+ messages in thread
From: John Fastabend @ 2019-11-22 17:37 UTC (permalink / raw)
  To: Alexei Starovoitov, davem; +Cc: daniel, netdev, bpf, kernel-team

Alexei Starovoitov wrote:
> Add a test that benchmarks different ways of attaching BPF program to a kernel function.
> Here are the results for 2.4Ghz x86 cpu on a kernel without mitigations:
> $ ./test_progs -n 49 -v|grep events
> task_rename base	2743K events per sec
> task_rename kprobe	2419K events per sec
> task_rename kretprobe	1876K events per sec
> task_rename raw_tp	2578K events per sec
> task_rename fentry	2710K events per sec
> task_rename fexit	2685K events per sec
> 
> On a kernel with retpoline:
> $ ./test_progs -n 49 -v|grep events
> task_rename base	2401K events per sec
> task_rename kprobe	1930K events per sec
> task_rename kretprobe	1485K events per sec
> task_rename raw_tp	2053K events per sec
> task_rename fentry	2351K events per sec
> task_rename fexit	2185K events per sec
> 
> All 5 approaches:
> - kprobe/kretprobe in __set_task_comm()
> - raw tracepoint in trace_task_rename()
> - fentry/fexit in __set_task_comm()
> are roughly equivalent.
> 
> __set_task_comm() by itself is quite fast, so any extra instructions add up.
> Until BPF trampoline was introduced the fastest mechanism was raw tracepoint.
> kprobe via ftrace was second best. kretprobe is slow due to trap. New
> fentry/fexit methods via BPF trampoline are clearly the fastest and the
> difference is more pronounced with retpoline on, since BPF trampoline doesn't
> use indirect jumps.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  .../selftests/bpf/prog_tests/test_overhead.c  | 142 ++++++++++++++++++
>  .../selftests/bpf/progs/test_overhead.c       |  43 ++++++
>  2 files changed, 185 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/test_overhead.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_overhead.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
> new file mode 100644
> index 000000000000..c32aa28bd93f

Acked-by: John Fastabend <john.fastabend@gmail.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: Add BPF trampoline performance test
  2019-11-22  1:15 [PATCH bpf-next] selftests/bpf: Add BPF trampoline performance test Alexei Starovoitov
  2019-11-22 17:37 ` John Fastabend
@ 2019-11-22 23:47 ` Daniel Borkmann
  1 sibling, 0 replies; 3+ messages in thread
From: Daniel Borkmann @ 2019-11-22 23:47 UTC (permalink / raw)
  To: Alexei Starovoitov, davem; +Cc: netdev, bpf, kernel-team

On 11/22/19 2:15 AM, Alexei Starovoitov wrote:
> Add a test that benchmarks different ways of attaching BPF program to a kernel function.
> Here are the results for 2.4Ghz x86 cpu on a kernel without mitigations:
> $ ./test_progs -n 49 -v|grep events
> task_rename base	2743K events per sec
> task_rename kprobe	2419K events per sec
> task_rename kretprobe	1876K events per sec
> task_rename raw_tp	2578K events per sec
> task_rename fentry	2710K events per sec
> task_rename fexit	2685K events per sec
> 
> On a kernel with retpoline:
> $ ./test_progs -n 49 -v|grep events
> task_rename base	2401K events per sec
> task_rename kprobe	1930K events per sec
> task_rename kretprobe	1485K events per sec
> task_rename raw_tp	2053K events per sec
> task_rename fentry	2351K events per sec
> task_rename fexit	2185K events per sec
> 
> All 5 approaches:
> - kprobe/kretprobe in __set_task_comm()
> - raw tracepoint in trace_task_rename()
> - fentry/fexit in __set_task_comm()
> are roughly equivalent.
> 
> __set_task_comm() by itself is quite fast, so any extra instructions add up.
> Until BPF trampoline was introduced the fastest mechanism was raw tracepoint.
> kprobe via ftrace was second best. kretprobe is slow due to trap. New
> fentry/fexit methods via BPF trampoline are clearly the fastest and the
> difference is more pronounced with retpoline on, since BPF trampoline doesn't
> use indirect jumps.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

Applied, thanks!

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-11-22 23:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-22  1:15 [PATCH bpf-next] selftests/bpf: Add BPF trampoline performance test Alexei Starovoitov
2019-11-22 17:37 ` John Fastabend
2019-11-22 23:47 ` Daniel Borkmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.