All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: davem@davemloft.net
Cc: daniel@iogearbox.net, andrii@kernel.org, bpf@vger.kernel.org,
	kernel-team@fb.com
Subject: [PATCH v4 bpf-next 07/16] bpf: Add bpf_core_add_cands() and wire it into bpf_core_apply_relo_insn().
Date: Tue, 23 Nov 2021 22:02:00 -0800	[thread overview]
Message-ID: <20211124060209.493-8-alexei.starovoitov@gmail.com> (raw)
In-Reply-To: <20211124060209.493-1-alexei.starovoitov@gmail.com>

From: Alexei Starovoitov <ast@kernel.org>

Given BPF program's BTF root type name perform the following steps:
. search in vmlinux candidate cache.
. if (present in cache and candidate list >= 1) return candidate list.
. do a linear search through kernel BTFs for possible candidates.
. regardless of number of candidates found populate vmlinux cache.
. if (candidate list >= 1) return candidate list.
. search in module candidate cache.
. if (present in cache) return candidate list (even if list is empty).
. do a linear search through BTFs of all kernel modules
  collecting candidates from all of them.
. regardless of number of candidates found populate module cache.
. return candidate list.
Then wire the result into bpf_core_apply_relo_insn().

When BPF program is trying to CO-RE relocate a type
that doesn't exist in either vmlinux BTF or in modules BTFs
these steps will perform 2 cache lookups when cache is hit.

Note the cache doesn't prevent the abuse by the program that might
have lots of relocations that cannot be resolved. Hence cond_resched().

CO-RE in the kernel requires CAP_BPF, since BTF loading requires it.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c          | 250 +++++++++++++++++++++++++++++++++++++-
 tools/lib/bpf/relo_core.h |   2 +
 2 files changed, 251 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index dbf1f389b1d3..cf971b8a0769 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -25,6 +25,7 @@
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <net/sock.h>
+#include "../tools/lib/bpf/relo_core.h"
 
 /* BTF (BPF Type Format) is the meta data format which describes
  * the data types of BPF program/map.  Hence, it basically focus
@@ -6169,6 +6170,8 @@ btf_module_read(struct file *file, struct kobject *kobj,
 	return len;
 }
 
+static void purge_cand_cache(struct btf *btf);
+
 static int btf_module_notify(struct notifier_block *nb, unsigned long op,
 			     void *module)
 {
@@ -6245,6 +6248,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
 			list_del(&btf_mod->list);
 			if (btf_mod->sysfs_attr)
 				sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
+			purge_cand_cache(btf_mod->btf);
 			btf_put(btf_mod->btf);
 			kfree(btf_mod->sysfs_attr);
 			kfree(btf_mod);
@@ -6440,8 +6444,252 @@ size_t bpf_core_essential_name_len(const char *name)
 	return n;
 }
 
+struct bpf_cand_cache {
+	const char *name;
+	u32 name_len;
+	u16 kind;
+	u16 cnt;
+	struct {
+		const struct btf *btf;
+		u32 id;
+	} cands[];
+};
+
+static void bpf_free_cands(struct bpf_cand_cache *cands)
+{
+	kfree(cands->name);
+	kfree(cands);
+}
+
+#define VMLINUX_CAND_CACHE_SIZE 31
+static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE];
+
+#define MODULE_CAND_CACHE_SIZE 31
+static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE];
+
+static DEFINE_MUTEX(cand_cache_mutex);
+
+static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands,
+					       struct bpf_cand_cache **cache,
+					       int cache_size)
+{
+	u32 hash = jhash_2words(cands->name_len,
+				(((u32) cands->name[0]) << 8) | cands->name[1], 0);
+	struct bpf_cand_cache *cc = cache[hash % cache_size];
+
+	if (cc && cc->name_len == cands->name_len &&
+	    !strncmp(cc->name, cands->name, cands->name_len))
+		return cc;
+	return NULL;
+}
+
+static void populate_cand_cache(struct bpf_cand_cache *cands,
+				struct bpf_cand_cache **cache,
+				int cache_size)
+{
+	u32 hash = jhash_2words(cands->name_len,
+				(((u32) cands->name[0]) << 8) | cands->name[1], 0);
+	struct bpf_cand_cache *cc = cache[hash % cache_size];
+
+	if (cc)
+		bpf_free_cands(cc);
+	cache[hash % cache_size] = cands;
+}
+
+static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache,
+			       int cache_size)
+{
+	struct bpf_cand_cache *cc;
+	int i, j;
+
+	for (i = 0; i < cache_size; i++) {
+		cc = cache[i];
+		if (!cc)
+			continue;
+		for (j = 0; j < cc->cnt; j++)
+			if (cc->cands[j].btf == btf) {
+				bpf_free_cands(cc);
+				cache[i] = NULL;
+				break;
+			}
+	}
+
+}
+
+static void purge_cand_cache(struct btf *btf)
+{
+	mutex_lock(&cand_cache_mutex);
+	__purge_cand_cache(btf, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
+	__purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+	mutex_unlock(&cand_cache_mutex);
+}
+
+static struct bpf_cand_cache *
+bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
+		   int targ_start_id)
+{
+	struct bpf_cand_cache *new_cands;
+	const struct btf_type *t;
+	const char *targ_name;
+	size_t targ_essent_len;
+	int n, i;
+
+	n = btf_nr_types(targ_btf);
+	for (i = targ_start_id; i < n; i++) {
+		t = btf_type_by_id(targ_btf, i);
+		if (btf_kind(t) != cands->kind)
+			continue;
+
+		targ_name = btf_name_by_offset(targ_btf, t->name_off);
+		if (!targ_name)
+			continue;
+
+		cond_resched();
+
+		if (strncmp(cands->name, targ_name, cands->name_len) != 0)
+			continue;
+
+		targ_essent_len = bpf_core_essential_name_len(targ_name);
+		if (targ_essent_len != cands->name_len)
+			continue;
+
+		new_cands = krealloc(cands,
+				     offsetof(struct bpf_cand_cache, cands[cands->cnt + 1]),
+				     GFP_KERNEL);
+		if (!new_cands) {
+			bpf_free_cands(cands);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		cands = new_cands;
+		cands->cands[cands->cnt].btf = targ_btf;
+		cands->cands[cands->cnt].id = i;
+		cands->cnt++;
+	}
+	return cands;
+}
+
+static struct bpf_cand_cache *
+bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
+{
+	const struct btf *local_btf = ctx->btf;
+	const struct btf_type *local_type;
+	const struct btf *main_btf;
+	size_t local_essent_len;
+	struct bpf_cand_cache *cands, *cc;
+	struct btf *mod_btf;
+	const char *name;
+	int id;
+
+	local_type = btf_type_by_id(local_btf, local_type_id);
+	if (!local_type)
+		return ERR_PTR(-EINVAL);
+
+	name = btf_name_by_offset(local_btf, local_type->name_off);
+	if (str_is_empty(name))
+		return ERR_PTR(-EINVAL);
+	local_essent_len = bpf_core_essential_name_len(name);
+
+	cands = kcalloc(1, sizeof(*cands), GFP_KERNEL);
+	if (!cands)
+		return ERR_PTR(-ENOMEM);
+	cands->name = kmemdup_nul(name, local_essent_len, GFP_KERNEL);
+	if (!cands->name) {
+		kfree(cands);
+		return ERR_PTR(-ENOMEM);
+	}
+	cands->kind = btf_kind(local_type);
+	cands->name_len = local_essent_len;
+
+	cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
+	if (cc) {
+		if (cc->cnt) {
+			bpf_free_cands(cands);
+			return cc;
+		}
+		goto check_modules;
+	}
+
+	/* Attempt to find target candidates in vmlinux BTF first */
+	main_btf = bpf_get_btf_vmlinux();
+	cands = bpf_core_add_cands(cands, main_btf, 1);
+	if (IS_ERR(cands))
+		return cands;
+
+	/* populate cache even when cands->cnt == 0 */
+	populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
+
+	/* if vmlinux BTF has any candidate, don't go for module BTFs */
+	if (cands->cnt)
+		return cands;
+
+check_modules:
+	cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+	if (cc) {
+		bpf_free_cands(cands);
+		/* if cache has it return it even if cc->cnt == 0 */
+		return cc;
+	}
+
+	/* If candidate is not found in vmlinux's BTF then search in module's BTFs */
+	spin_lock_bh(&btf_idr_lock);
+	idr_for_each_entry(&btf_idr, mod_btf, id) {
+		if (!btf_is_module(mod_btf))
+			continue;
+		/* linear search could be slow hence unlock/lock
+		 * the IDR to avoiding holding it for too long
+		 */
+		btf_get(mod_btf);
+		spin_unlock_bh(&btf_idr_lock);
+		cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
+		if (IS_ERR(cands)) {
+			btf_put(mod_btf);
+			return cands;
+		}
+		spin_lock_bh(&btf_idr_lock);
+		btf_put(mod_btf);
+	}
+	spin_unlock_bh(&btf_idr_lock);
+	/* populate cache even when cands->cnt == 0 */
+	populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
+	return cands;
+}
+
 int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
 		   int relo_idx, void *insn)
 {
-	return -EOPNOTSUPP;
+	struct bpf_core_cand_list cands = {};
+	int err;
+
+	if (relo->kind != BPF_CORE_TYPE_ID_LOCAL) {
+		struct bpf_cand_cache *cc;
+		int i;
+
+		mutex_lock(&cand_cache_mutex);
+		cc = bpf_core_find_cands(ctx, relo->type_id);
+		if (IS_ERR(cc)) {
+			bpf_log(ctx->log, "target candidate search failed for %d\n",
+				relo->type_id);
+			return PTR_ERR(cc);
+		}
+		if (cc->cnt) {
+			cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
+			if (!cands.cands)
+				return -ENOMEM;
+		}
+		for (i = 0; i < cc->cnt; i++) {
+			bpf_log(ctx->log,
+				"CO-RE relocating %s %s: found target candidate [%d]\n",
+				btf_kind_str[cc->kind], cc->name, cc->cands[i].id);
+			cands.cands[i].btf = cc->cands[i].btf;
+			cands.cands[i].id = cc->cands[i].id;
+		}
+		cands.len = cc->cnt;
+		mutex_unlock(&cand_cache_mutex);
+	}
+
+	err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8,
+				       relo, relo_idx, ctx->btf, &cands);
+	kfree(cands.cands);
+	return err;
 }
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index f410691cc4e5..f7b0d698978c 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -8,8 +8,10 @@
 
 struct bpf_core_cand {
 	const struct btf *btf;
+#ifndef __KERNEL__
 	const struct btf_type *t;
 	const char *name;
+#endif
 	__u32 id;
 };
 
-- 
2.30.2


  parent reply	other threads:[~2021-11-24  6:02 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-24  6:01 [PATCH v4 bpf-next 00/16] bpf: CO-RE support in the kernel Alexei Starovoitov
2021-11-24  6:01 ` [PATCH v4 bpf-next 01/16] libbpf: Replace btf__type_by_id() with btf_type_by_id() Alexei Starovoitov
2021-11-24  6:01 ` [PATCH v4 bpf-next 02/16] bpf: Rename btf_member accessors Alexei Starovoitov
2021-11-24  6:01 ` [PATCH v4 bpf-next 03/16] bpf: Prepare relo_core.c for kernel duty Alexei Starovoitov
2021-11-24  6:01 ` [PATCH v4 bpf-next 04/16] bpf: Define enum bpf_core_relo_kind as uapi Alexei Starovoitov
2021-11-24  6:01 ` [PATCH v4 bpf-next 05/16] bpf: Pass a set of bpf_core_relo-s to prog_load command Alexei Starovoitov
2021-11-24  6:01 ` [PATCH v4 bpf-next 06/16] bpf: Adjust BTF log size limit Alexei Starovoitov
2021-11-24  6:02 ` Alexei Starovoitov [this message]
2021-11-30  1:03   ` [PATCH v4 bpf-next 07/16] bpf: Add bpf_core_add_cands() and wire it into bpf_core_apply_relo_insn() Andrii Nakryiko
2021-11-30  3:18     ` Alexei Starovoitov
2021-11-30  4:09       ` Andrii Nakryiko
2021-11-30  5:04         ` Alexei Starovoitov
2021-11-30  5:14           ` Andrii Nakryiko
2021-11-30 23:06         ` Alexei Starovoitov
2021-11-30 23:12           ` Andrii Nakryiko
2021-11-30  5:50   ` Alexei Starovoitov
2021-11-24  6:02 ` [PATCH v4 bpf-next 08/16] libbpf: Use CO-RE in the kernel in light skeleton Alexei Starovoitov
2021-11-30  1:11   ` Andrii Nakryiko
2021-11-24  6:02 ` [PATCH v4 bpf-next 09/16] libbpf: Support init of inner maps " Alexei Starovoitov
2021-11-24  6:02 ` [PATCH v4 bpf-next 10/16] libbpf: Clean gen_loader's attach kind Alexei Starovoitov
2021-11-30  1:11   ` Andrii Nakryiko
2021-11-24  6:02 ` [PATCH v4 bpf-next 11/16] selftests/bpf: Add lskel version of kfunc test Alexei Starovoitov
2021-11-24  6:02 ` [PATCH v4 bpf-next 12/16] selftests/bpf: Improve inner_map test coverage Alexei Starovoitov
2021-11-24  6:02 ` [PATCH v4 bpf-next 13/16] selftests/bpf: Convert map_ptr_kern test to use light skeleton Alexei Starovoitov
2021-11-24  6:02 ` [PATCH v4 bpf-next 14/16] selftests/bpf: Additional test for CO-RE in the kernel Alexei Starovoitov
2021-11-30  1:13   ` Andrii Nakryiko
2021-11-24  6:02 ` [PATCH v4 bpf-next 15/16] selftests/bpf: Revert CO-RE removal in test_ksyms_weak Alexei Starovoitov
2021-11-24  6:02 ` [PATCH v4 bpf-next 16/16] selftests/bpf: Add CO-RE relocations to verifier scale test Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211124060209.493-8-alexei.starovoitov@gmail.com \
    --to=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=kernel-team@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.