netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiri Olsa <jolsa@redhat.com>
To: "Björn Töpel" <bjorn.topel@intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Jiri Olsa <jolsa@kernel.org>, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	Andrii Nakryiko <andriin@fb.com>, Yonghong Song <yhs@fb.com>,
	Martin KaFai Lau <kafai@fb.com>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	David Miller <davem@redhat.com>
Subject: Re: [PATCH 5/5] bpf: Allow to resolve bpf trampoline in unwind
Date: Mon, 13 Jan 2020 10:43:10 +0100	[thread overview]
Message-ID: <20200113094310.GE35080@krava> (raw)
In-Reply-To: <76a10338-391a-ffca-9af8-f407265d146a@intel.com>

On Tue, Jan 07, 2020 at 02:30:35PM +0100, Björn Töpel wrote:
> On 2020-01-07 14:05, Jiri Olsa wrote:
> > On Mon, Jan 06, 2020 at 03:46:40PM -0800, Alexei Starovoitov wrote:
> > > On Sun, Dec 29, 2019 at 03:37:40PM +0100, Jiri Olsa wrote:
> > > > When unwinding the stack we need to identify each
> > > > address to successfully continue. Adding latch tree
> > > > to keep trampolines for quick lookup during the
> > > > unwind.
> > > > 
> > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > ...
> > > > +bool is_bpf_trampoline(void *addr)
> > > > +{
> > > > +	return latch_tree_find(addr, &tree, &tree_ops) != NULL;
> > > > +}
> > > > +
> > > >   struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> > > >   {
> > > >   	struct bpf_trampoline *tr;
> > > > @@ -65,6 +98,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
> > > >   	for (i = 0; i < BPF_TRAMP_MAX; i++)
> > > >   		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
> > > >   	tr->image = image;
> > > > +	latch_tree_insert(&tr->tnode, &tree, &tree_ops);
> > > 
> > > Thanks for the fix. I was thinking to apply it, but then realized that bpf
> > > dispatcher logic has the same issue.
> > > Could you generalize the fix for both?
> > > May be bpf_jit_alloc_exec_page() can do latch_tree_insert() ?
> > > and new version of bpf_jit_free_exec() is needed that will do latch_tree_erase().
> > > Wdyt?
> > 
> > I need to check the dispatcher code, but seems ok.. will check
> > 
> 
> Thanks Jiri! The trampoline and dispatcher share the image allocation, so
> putting it there would make sense.
> 
> It's annoying that the dispatcher doesn't show up correctly in perf, and
> it's been on my list to fix that. Hopefully you beat me to it! :-D

hi,
attached patch seems to work for me (trampoline usecase), but I don't know
how to test it for dispatcher.. also I need to check if we need to decrease
BPF_TRAMP_MAX or BPF_DISPATCHER_MAX, it might take more time ;-)

jirka


---
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aed2bc39d72b..e0ca8780dc7a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -510,7 +510,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
 int bpf_trampoline_link_prog(struct bpf_prog *prog);
 int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
-void *bpf_jit_alloc_exec_page(void);
 #define BPF_DISPATCHER_INIT(name) {			\
 	.mutex = __MUTEX_INITIALIZER(name.mutex),	\
 	.func = &name##func,				\
@@ -542,6 +541,13 @@ void *bpf_jit_alloc_exec_page(void);
 #define BPF_DISPATCHER_PTR(name) (&name)
 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 				struct bpf_prog *to);
+struct bpf_image {
+	struct latch_tree_node tnode;
+	unsigned char data[];
+};
+#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
+bool is_bpf_image(void *addr);
+void *bpf_image_alloc(void);
 #else
 static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
@@ -563,6 +569,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
 static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
 					      struct bpf_prog *from,
 					      struct bpf_prog *to) {}
+static inline bool is_bpf_image(void *addr)
+{
+	return false;
+}
 #endif
 
 struct bpf_func_info_aux {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 29d47aae0dd1..53dc3adf6077 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -704,6 +704,8 @@ bool is_bpf_text_address(unsigned long addr)
 
 	rcu_read_lock();
 	ret = bpf_prog_kallsyms_find(addr) != NULL;
+	if (!ret)
+		ret = is_bpf_image((void*) addr);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 204ee61a3904..b3e5b214fed8 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
 		noff = 0;
 	} else {
 		old = d->image + d->image_off;
-		noff = d->image_off ^ (PAGE_SIZE / 2);
+		noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
 	}
 
 	new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 
 	mutex_lock(&d->mutex);
 	if (!d->image) {
-		d->image = bpf_jit_alloc_exec_page();
+		d->image = bpf_image_alloc();
 		if (!d->image)
 			goto out;
 	}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 79a04417050d..3ea56f89c68a 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/ftrace.h>
+#include <linux/rbtree_latch.h>
 
 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
 #define TRAMPOLINE_HASH_BITS 10
@@ -14,7 +15,12 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
 /* serializes access to trampoline_table */
 static DEFINE_MUTEX(trampoline_mutex);
 
-void *bpf_jit_alloc_exec_page(void)
+static struct latch_tree_root image_tree __cacheline_aligned;
+
+/* serializes access to image_tree */
+static DEFINE_MUTEX(image_mutex);
+
+static void *bpf_jit_alloc_exec_page(void)
 {
 	void *image;
 
@@ -30,6 +36,62 @@ void *bpf_jit_alloc_exec_page(void)
 	return image;
 }
 
+static __always_inline bool image_tree_less(struct latch_tree_node *a,
+				      struct latch_tree_node *b)
+{
+	struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
+	struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
+
+	return ia < ib;
+}
+
+static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
+{
+	void *image = container_of(n, struct bpf_image, tnode);
+
+	if (addr < image)
+		return -1;
+	if (addr >= image + PAGE_SIZE)
+		return 1;
+
+	return 0;
+}
+
+static const struct latch_tree_ops image_tree_ops = {
+	.less	= image_tree_less,
+	.comp	= image_tree_comp,
+};
+
+void *bpf_image_alloc(void)
+{
+	struct bpf_image *image;
+
+	image = bpf_jit_alloc_exec_page();
+	if (!image)
+		return NULL;
+
+	mutex_lock(&image_mutex);
+	latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
+	mutex_unlock(&image_mutex);
+	return image->data;
+}
+
+void bpf_image_delete(void *ptr)
+{
+	struct bpf_image *image = container_of(ptr, struct bpf_image, data);
+
+	mutex_lock(&image_mutex);
+	latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
+	synchronize_rcu();
+	bpf_jit_free_exec(image);
+	mutex_unlock(&image_mutex);
+}
+
+bool is_bpf_image(void *addr)
+{
+	return latch_tree_find(addr, &image_tree, &image_tree_ops) != NULL;
+}
+
 struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
 	struct bpf_trampoline *tr;
@@ -50,7 +112,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 		goto out;
 
 	/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
-	image = bpf_jit_alloc_exec_page();
+	image = bpf_image_alloc();
 	if (!image) {
 		kfree(tr);
 		tr = NULL;
@@ -125,14 +187,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 }
 
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86.  Pick a number to fit into PAGE_SIZE / 2
+ * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
  */
 #define BPF_MAX_TRAMP_PROGS 40
 
 static int bpf_trampoline_update(struct bpf_trampoline *tr)
 {
-	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
-	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+	void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
+	void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
 	struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
 	int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
 	int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -160,7 +222,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
 	if (fexit_cnt)
 		flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 
-	err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
+	err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
 					  &tr->func.model, flags,
 					  fentry, fentry_cnt,
 					  fexit, fexit_cnt,
@@ -251,7 +313,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 		goto out;
 	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
 		goto out;
-	bpf_jit_free_exec(tr->image);
+	bpf_image_delete(tr->image);
 	hlist_del(&tr->hlist);
 	kfree(tr);
 out:


  reply	other threads:[~2020-01-13  9:43 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-29 14:37 [RFC 0/5] bpf: Add trampoline helpers Jiri Olsa
2019-12-29 14:37 ` [PATCH 1/5] bpf: Allow non struct type for btf ctx access Jiri Olsa
2020-01-06 21:36   ` Yonghong Song
2020-01-07 12:13     ` Jiri Olsa
2020-01-07 15:50       ` Jiri Olsa
2020-01-07 17:55         ` Yonghong Song
2020-01-07 18:28           ` Yonghong Song
2020-01-08 14:38             ` Jiri Olsa
2019-12-29 14:37 ` [PATCH 2/5] bpf: Add bpf_perf_event_output_kfunc Jiri Olsa
2020-01-06 23:27   ` Alexei Starovoitov
2020-01-07 12:25     ` Jiri Olsa
2020-01-07 22:13       ` Alexei Starovoitov
2020-01-08 10:24         ` Jiri Olsa
2019-12-29 14:37 ` [PATCH 3/5] bpf: Add bpf_get_stackid_kfunc Jiri Olsa
2019-12-29 14:37 ` [PATCH 4/5] bpf: Add bpf_get_stack_kfunc Jiri Olsa
2019-12-29 14:37 ` [PATCH 5/5] bpf: Allow to resolve bpf trampoline in unwind Jiri Olsa
2020-01-06 23:46   ` Alexei Starovoitov
2020-01-07  8:30     ` Daniel Borkmann
2020-01-07 13:15       ` Jiri Olsa
2020-01-07 19:30         ` Daniel Borkmann
2020-01-07 13:05     ` Jiri Olsa
2020-01-07 13:30       ` Björn Töpel
2020-01-13  9:43         ` Jiri Olsa [this message]
2020-01-13 12:21           ` Björn Töpel
2020-01-13 12:31             ` Björn Töpel
2020-01-13 12:37               ` Jiri Olsa
2020-02-03 19:58                 ` Jiri Olsa
2020-02-03 20:27                   ` Björn Töpel
2020-02-03 20:45                     ` Toke Høiland-Jørgensen
2020-02-04  8:18                     ` Jiri Olsa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200113094310.GE35080@krava \
    --to=jolsa@redhat.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andriin@fb.com \
    --cc=ast@kernel.org \
    --cc=bjorn.topel@intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@redhat.com \
    --cc=jakub.kicinski@netronome.com \
    --cc=jolsa@kernel.org \
    --cc=kafai@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).