netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiri Olsa <jolsa@kernel.org>
To: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andriin@fb.com>
Cc: netdev@vger.kernel.org, bpf@vger.kernel.org,
	"Martin KaFai Lau" <kafai@fb.com>,
	"Song Liu" <songliubraving@fb.com>, "Yonghong Song" <yhs@fb.com>,
	"John Fastabend" <john.fastabend@gmail.com>,
	"KP Singh" <kpsingh@chromium.org>, "Daniel Xu" <dxu@dxuuu.xyz>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Jesper Brouer" <jbrouer@redhat.com>,
	"Toke Høiland-Jørgensen" <toke@redhat.com>,
	"Viktor Malik" <vmalik@redhat.com>
Subject: [RFC bpf-next 09/16] bpf: Add BPF_TRAMPOLINE_BATCH_ATTACH support
Date: Thu, 22 Oct 2020 10:21:31 +0200	[thread overview]
Message-ID: <20201022082138.2322434-10-jolsa@kernel.org> (raw)
In-Reply-To: <20201022082138.2322434-1-jolsa@kernel.org>

Adding BPF_TRAMPOLINE_BATCH_ATTACH support, that allows to attach
tracing multiple fentry/fexit pograms to trampolines within one
syscall.

Currently each tracing program is attached in seprate bpf syscall
and more importantly by separate register_ftrace_direct call, which
registers trampoline in ftrace subsystem. We can save some cycles
by simple using its batch variant register_ftrace_direct_ips.

Before:

 Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s*
    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):

     2,199,433,771      cycles:k               ( +-  0.55% )
       936,105,469      cycles:u               ( +-  0.37% )

             26.48 +- 3.57 seconds time elapsed  ( +- 13.49% )

After:

 Performance counter stats for './src/bpftrace -ve kfunc:__x64_sys_s*
    { printf("test\n"); } i:ms:10 { printf("exit\n"); exit();}' (5 runs):

     1,456,854,867      cycles:k               ( +-  0.57% )
       937,737,431      cycles:u               ( +-  0.13% )

             12.44 +- 2.98 seconds time elapsed  ( +- 23.95% )

The new BPF_TRAMPOLINE_BATCH_ATTACH syscall command expects
following data in union bpf_attr:

  struct {
          __aligned_u64   in;
          __aligned_u64   out;
          __u32           count;
  } trampoline_batch;

  in    - pointer to user space array with file descrptors of loaded bpf
          programs to attach
  out   - pointer to user space array for resulting link descriptor
  count - number of 'in/out' file descriptors

Basically the new code gets programs from 'in' file descriptors and
attaches them the same way the current code does, apart from the last
step that registers probe ip with trampoline. This is done at the end
with new register_ftrace_direct_ips function.

The resulting link descriptors are written in 'out' array and match
'in' array file descriptors order.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h      | 15 ++++++-
 include/uapi/linux/bpf.h |  7 ++++
 kernel/bpf/syscall.c     | 88 ++++++++++++++++++++++++++++++++++++++--
 kernel/bpf/trampoline.c  | 69 +++++++++++++++++++++++++++----
 4 files changed, 164 insertions(+), 15 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2b16bf48aab6..d28c7ac3af3f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -583,6 +583,13 @@ enum bpf_tramp_prog_type {
 	BPF_TRAMP_REPLACE, /* more than MAX */
 };
 
+struct bpf_trampoline_batch {
+	int count;
+	int idx;
+	unsigned long *ips;
+	unsigned long *addrs;
+};
+
 struct bpf_trampoline {
 	/* hlist for trampoline_table */
 	struct hlist_node hlist;
@@ -644,11 +651,14 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
 	return bpf_func(ctx, insnsi);
 }
 #ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
+int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr,
+			     struct bpf_trampoline_batch *batch);
 int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
+struct bpf_trampoline_batch *bpf_trampoline_batch_alloc(int count);
+void bpf_trampoline_batch_free(struct bpf_trampoline_batch *batch);
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
 	.func = &_name##_func,					\
@@ -693,7 +703,8 @@ void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
-					   struct bpf_trampoline *tr)
+					   struct bpf_trampoline *tr,
+					   struct bpf_trampoline_batch *batch)
 {
 	return -ENOTSUPP;
 }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bf5a99d803e4..04df4d576fd4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -125,6 +125,7 @@ enum bpf_cmd {
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
 	BPF_PROG_BIND_MAP,
+	BPF_TRAMPOLINE_BATCH_ATTACH,
 };
 
 enum bpf_map_type {
@@ -631,6 +632,12 @@ union bpf_attr {
 		__u32 prog_fd;
 	} raw_tracepoint;
 
+	struct { /* anonymous struct used by BPF_TRAMPOLINE_BATCH_ATTACH */
+		__aligned_u64	in;
+		__aligned_u64	out;
+		__u32		count;
+	} trampoline_batch;
+
 	struct { /* anonymous struct for BPF_BTF_LOAD */
 		__aligned_u64	btf;
 		__aligned_u64	btf_log_buf;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 61ef29f9177d..e370b37e3e8e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2553,7 +2553,8 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
 
 static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 				   int tgt_prog_fd,
-				   u32 btf_id)
+				   u32 btf_id,
+				   struct bpf_trampoline_batch *batch)
 {
 	struct bpf_link_primer link_primer;
 	struct bpf_prog *tgt_prog = NULL;
@@ -2678,7 +2679,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	if (err)
 		goto out_unlock;
 
-	err = bpf_trampoline_link_prog(prog, tr);
+	err = bpf_trampoline_link_prog(prog, tr, batch);
 	if (err) {
 		bpf_link_cleanup(&link_primer);
 		link = NULL;
@@ -2826,7 +2827,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 			tp_name = prog->aux->attach_func_name;
 			break;
 		}
-		return bpf_tracing_prog_attach(prog, 0, 0);
+		return bpf_tracing_prog_attach(prog, 0, 0, NULL);
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 		if (strncpy_from_user(buf,
@@ -2879,6 +2880,81 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	return err;
 }
 
+#define BPF_RAW_TRACEPOINT_OPEN_BATCH_LAST_FIELD trampoline_batch.count
+
+static int bpf_trampoline_batch(const union bpf_attr *attr, int cmd)
+{
+	void __user *uout = u64_to_user_ptr(attr->trampoline_batch.out);
+	void __user *uin = u64_to_user_ptr(attr->trampoline_batch.in);
+	struct bpf_trampoline_batch *batch = NULL;
+	struct bpf_prog *prog;
+	int count, ret, i, fd;
+	u32 *in, *out;
+
+	if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN_BATCH))
+		return -EINVAL;
+
+	if (!uin || !uout)
+		return -EINVAL;
+
+	count = attr->trampoline_batch.count;
+
+	in = kcalloc(count, sizeof(u32), GFP_KERNEL);
+	out = kcalloc(count, sizeof(u32), GFP_KERNEL);
+	if (!in || !out) {
+		kfree(in);
+		kfree(out);
+		return -ENOMEM;
+	}
+
+	ret = copy_from_user(in, uin, count * sizeof(u32));
+	if (ret)
+		goto out_clean;
+
+	/* test read out array */
+	ret = copy_to_user(uout, out, count * sizeof(u32));
+	if (ret)
+		goto out_clean;
+
+	batch = bpf_trampoline_batch_alloc(count);
+	if (!batch)
+		goto out_clean;
+
+	for (i = 0; i < count; i++) {
+		if (cmd == BPF_TRAMPOLINE_BATCH_ATTACH) {
+			prog = bpf_prog_get(in[i]);
+			if (IS_ERR(prog)) {
+				ret = PTR_ERR(prog);
+				goto out_clean;
+			}
+
+			ret = -EINVAL;
+			if (prog->type != BPF_PROG_TYPE_TRACING)
+				goto out_clean;
+			if (prog->type == BPF_PROG_TYPE_TRACING &&
+			    prog->expected_attach_type == BPF_TRACE_RAW_TP)
+				goto out_clean;
+
+			fd = bpf_tracing_prog_attach(prog, 0, 0, batch);
+			if (fd < 0)
+				goto out_clean;
+
+			out[i] = fd;
+		}
+	}
+
+	ret = register_ftrace_direct_ips(batch->ips, batch->addrs, batch->idx);
+	if (!ret)
+		WARN_ON_ONCE(copy_to_user(uout, out, count * sizeof(u32)));
+
+out_clean:
+	/* XXX cleanup partialy attached array */
+	bpf_trampoline_batch_free(batch);
+	kfree(in);
+	kfree(out);
+	return ret;
+}
+
 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 					     enum bpf_attach_type attach_type)
 {
@@ -4018,7 +4094,8 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
 	else if (prog->type == BPF_PROG_TYPE_EXT)
 		return bpf_tracing_prog_attach(prog,
 					       attr->link_create.target_fd,
-					       attr->link_create.target_btf_id);
+					       attr->link_create.target_btf_id,
+					       NULL);
 	return -EINVAL;
 }
 
@@ -4437,6 +4514,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_RAW_TRACEPOINT_OPEN:
 		err = bpf_raw_tracepoint_open(&attr);
 		break;
+	case BPF_TRAMPOLINE_BATCH_ATTACH:
+		err = bpf_trampoline_batch(&attr, cmd);
+		break;
 	case BPF_BTF_LOAD:
 		err = bpf_btf_load(&attr);
 		break;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 35c5887d82ff..3383644eccc8 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -107,6 +107,51 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 	return tr;
 }
 
+static int bpf_trampoline_batch_add(struct bpf_trampoline_batch *batch,
+				    unsigned long ip, unsigned long addr)
+{
+	int idx = batch->idx;
+
+	if (idx >= batch->count)
+		return -EINVAL;
+
+	batch->ips[idx] = ip;
+	batch->addrs[idx] = addr;
+	batch->idx++;
+	return 0;
+}
+
+struct bpf_trampoline_batch *bpf_trampoline_batch_alloc(int count)
+{
+	struct bpf_trampoline_batch *batch;
+
+	batch = kmalloc(sizeof(*batch), GFP_KERNEL);
+	if (!batch)
+		return NULL;
+
+	batch->ips = kcalloc(count, sizeof(batch->ips[0]), GFP_KERNEL);
+	batch->addrs = kcalloc(count, sizeof(batch->addrs[0]), GFP_KERNEL);
+	if (!batch->ips || !batch->addrs) {
+		kfree(batch->ips);
+		kfree(batch->addrs);
+		kfree(batch);
+		return NULL;
+	}
+
+	batch->count = count;
+	batch->idx = 0;
+	return batch;
+}
+
+void bpf_trampoline_batch_free(struct bpf_trampoline_batch *batch)
+{
+	if (!batch)
+		return;
+	kfree(batch->ips);
+	kfree(batch->addrs);
+	kfree(batch);
+}
+
 static int is_ftrace_location(void *ip)
 {
 	long addr;
@@ -144,7 +189,8 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad
 }
 
 /* first time registering */
-static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
+static int register_fentry(struct bpf_trampoline *tr, void *new_addr,
+			   struct bpf_trampoline_batch *batch)
 {
 	void *ip = tr->func.addr;
 	int ret;
@@ -154,9 +200,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 		return ret;
 	tr->func.ftrace_managed = ret;
 
-	if (tr->func.ftrace_managed)
-		ret = register_ftrace_direct((long)ip, (long)new_addr);
-	else
+	if (tr->func.ftrace_managed) {
+		if (batch)
+			ret = bpf_trampoline_batch_add(batch, (long)ip, (long)new_addr);
+		else
+			ret = register_ftrace_direct((long)ip, (long)new_addr);
+	} else
 		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
 	return ret;
 }
@@ -185,7 +234,8 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
 	return tprogs;
 }
 
-static int bpf_trampoline_update(struct bpf_trampoline *tr)
+static int bpf_trampoline_update(struct bpf_trampoline *tr,
+				 struct bpf_trampoline_batch *batch)
 {
 	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
 	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
@@ -230,7 +280,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 		err = modify_fentry(tr, old_image, new_image);
 	else
 		/* first time registering */
-		err = register_fentry(tr, new_image);
+		err = register_fentry(tr, new_image, batch);
 	if (err)
 		goto out;
 	tr->selector++;
@@ -261,7 +311,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 	}
 }
 
-int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr,
+			     struct bpf_trampoline_batch *batch)
 {
 	enum bpf_tramp_prog_type kind;
 	int err = 0;
@@ -299,7 +350,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 	}
 	hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
 	tr->progs_cnt[kind]++;
-	err = bpf_trampoline_update(tr);
+	err = bpf_trampoline_update(tr, batch);
 	if (err) {
 		hlist_del(&prog->aux->tramp_hlist);
 		tr->progs_cnt[kind]--;
@@ -326,7 +377,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 	}
 	hlist_del(&prog->aux->tramp_hlist);
 	tr->progs_cnt[kind]--;
-	err = bpf_trampoline_update(tr);
+	err = bpf_trampoline_update(tr, NULL);
 out:
 	mutex_unlock(&tr->mutex);
 	return err;
-- 
2.26.2


  parent reply	other threads:[~2020-10-22  8:22 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-22  8:21 [RFC bpf-next 00/16] bpf: Speed up trampoline attach Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 01/16] ftrace: Add check_direct_entry function Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 02/16] ftrace: Add adjust_direct_size function Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 03/16] ftrace: Add get/put_direct_func function Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 04/16] ftrace: Add ftrace_set_filter_ips function Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 05/16] ftrace: Add register_ftrace_direct_ips function Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 06/16] ftrace: Add unregister_ftrace_direct_ips function Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 07/16] kallsyms: Use rb tree for kallsyms name search Jiri Olsa
2020-10-28 18:25   ` Jiri Olsa
2020-10-28 21:15     ` Alexei Starovoitov
2020-10-29  9:29       ` Jiri Olsa
2020-10-29 22:45         ` Andrii Nakryiko
2020-10-28 22:40     ` Andrii Nakryiko
2020-10-29  9:33       ` Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 08/16] bpf: Use delayed link free in bpf_link_put Jiri Olsa
2020-10-23 19:46   ` Andrii Nakryiko
2020-10-25 19:02     ` Jiri Olsa
2020-10-22  8:21 ` Jiri Olsa [this message]
2020-10-23 20:03   ` [RFC bpf-next 09/16] bpf: Add BPF_TRAMPOLINE_BATCH_ATTACH support Andrii Nakryiko
2020-10-23 20:31     ` Steven Rostedt
2020-10-23 22:23       ` Andrii Nakryiko
2020-10-25 19:41         ` Jiri Olsa
2020-10-26 23:19           ` Andrii Nakryiko
2020-10-22  8:21 ` [RFC bpf-next 10/16] bpf: Add BPF_TRAMPOLINE_BATCH_DETACH support Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 11/16] bpf: Sync uapi bpf.h to tools Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 12/16] bpf: Move synchronize_rcu_mult for batch processing (NOT TO BE MERGED) Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 13/16] libbpf: Add trampoline batch attach support Jiri Olsa
2020-10-23 20:09   ` Andrii Nakryiko
2020-10-25 19:11     ` Jiri Olsa
2020-10-26 23:15       ` Andrii Nakryiko
2020-10-27 19:03         ` Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 14/16] libbpf: Add trampoline batch detach support Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 15/16] selftests/bpf: Add trampoline batch test Jiri Olsa
2020-10-22  8:21 ` [RFC bpf-next 16/16] selftests/bpf: Add attach batch test (NOT TO BE MERGED) Jiri Olsa
2020-10-22 13:35 ` [RFC bpf-next 00/16] bpf: Speed up trampoline attach Steven Rostedt
2020-10-22 14:11   ` Jiri Olsa
2020-10-22 14:42     ` Steven Rostedt
2020-10-22 16:21       ` Steven Rostedt
2020-10-22 20:52         ` Steven Rostedt
2020-10-23  6:09           ` Jiri Olsa
2020-10-23 13:50             ` Steven Rostedt
2020-10-25 19:01               ` Jiri Olsa
2020-10-27  4:30       ` Alexei Starovoitov
2020-10-27 13:14         ` Steven Rostedt
2020-10-27 14:28         ` Jiri Olsa
2020-10-28 21:13           ` Alexei Starovoitov
2020-10-29 11:09             ` Jiri Olsa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201022082138.2322434-10-jolsa@kernel.org \
    --to=jolsa@kernel.org \
    --cc=andriin@fb.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dxu@dxuuu.xyz \
    --cc=jbrouer@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@chromium.org \
    --cc=netdev@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=songliubraving@fb.com \
    --cc=toke@redhat.com \
    --cc=vmalik@redhat.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).