All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Dave Marchevsky <davemarchevsky@fb.com>,
	Delyan Kratunov <delyank@fb.com>
Subject: [PATCH RFC bpf-next v1 13/32] bpf: Introduce bpf_list_head support for BPF maps
Date: Sun,  4 Sep 2022 22:41:26 +0200	[thread overview]
Message-ID: <20220904204145.3089-14-memxor@gmail.com> (raw)
In-Reply-To: <20220904204145.3089-1-memxor@gmail.com>

Add the basic support on the map side to parse, recognize, verify, and
build metadata table for a new special field of the type struct
bpf_list_head. To parameterize the bpf_list_head for a certain value
type and the list_node member it will accept in that value type, we use
BTF declaration tags.

The definition of bpf_list_head in a map value will be done as follows:

struct foo {
	int data;
	struct bpf_list_node list;
};

struct map_value {
	struct bpf_list_head list __contains(struct, foo, node);
};

Then, the bpf_list_head only allows adding to the list using the
bpf_list_node 'list' for the type struct foo.

The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:kind:name:node" where the kind and name is then used to
look up the type in the map BTF. The node defines name of the member in
this type that has the type struct bpf_list_node, which is actually used
for linking into the linked list.

This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.

For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is left out
for now, and just freeing the list_head_off_tab is done, since it is
still built and populated when bpf_list_head is specified in the map
value.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 include/linux/bpf.h                           |  64 +++++--
 include/linux/btf.h                           |   2 +
 kernel/bpf/arraymap.c                         |   2 +
 kernel/bpf/bpf_local_storage.c                |   1 +
 kernel/bpf/btf.c                              | 173 +++++++++++++++++-
 kernel/bpf/hashtab.c                          |   1 +
 kernel/bpf/map_in_map.c                       |   5 +-
 kernel/bpf/syscall.c                          | 131 +++++++++++--
 kernel/bpf/verifier.c                         |  21 +++
 .../testing/selftests/bpf/bpf_experimental.h  |  21 +++
 10 files changed, 378 insertions(+), 43 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_experimental.h

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d4e6bf789c02..35c2e9caeb98 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -28,6 +28,9 @@
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
 
+/* Experimental BPF APIs header for type definitions */
+#include "../../../tools/testing/selftests/bpf/bpf_experimental.h"
+
 struct bpf_verifier_env;
 struct bpf_verifier_log;
 struct perf_event;
@@ -164,27 +167,40 @@ struct bpf_map_ops {
 };
 
 enum {
-	/* Support at most 8 pointers in a BPF map value */
-	BPF_MAP_VALUE_OFF_MAX = 8,
-	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
-				1 + /* for bpf_spin_lock */
-				1,  /* for bpf_timer */
-};
-
-enum bpf_kptr_type {
+	/* Support at most 8 offsets in a table */
+	BPF_MAP_VALUE_OFF_MAX		= 8,
+	/* Support at most 8 pointer in a BPF map value */
+	BPF_MAP_VALUE_KPTR_MAX		= BPF_MAP_VALUE_OFF_MAX,
+	/* Support at most 8 list_head in a BPF map value */
+	BPF_MAP_VALUE_LIST_HEAD_MAX	= BPF_MAP_VALUE_OFF_MAX,
+	BPF_MAP_OFF_ARR_MAX		= BPF_MAP_VALUE_KPTR_MAX +
+					  BPF_MAP_VALUE_LIST_HEAD_MAX +
+					  1 + /* for bpf_spin_lock */
+					  1,  /* for bpf_timer */
+};
+
+enum bpf_off_type {
 	BPF_KPTR_UNREF,
 	BPF_KPTR_REF,
+	BPF_LIST_HEAD,
 };
 
 struct bpf_map_value_off_desc {
 	u32 offset;
-	enum bpf_kptr_type type;
-	struct {
-		struct btf *btf;
-		struct module *module;
-		btf_dtor_kfunc_t dtor;
-		u32 btf_id;
-	} kptr;
+	enum bpf_off_type type;
+	union {
+		struct {
+			struct btf *btf;
+			struct module *module;
+			btf_dtor_kfunc_t dtor;
+			u32 btf_id;
+		} kptr; /* for BPF_KPTR_{UNREF,REF} */
+		struct {
+			struct btf *btf;
+			u32 value_type_id;
+			u32 list_node_off;
+		} list_head; /* for BPF_LIST_HEAD */
+	};
 };
 
 struct bpf_map_value_off {
@@ -215,6 +231,7 @@ struct bpf_map {
 	u32 map_flags;
 	int spin_lock_off; /* >=0 valid offset, <0 error */
 	struct bpf_map_value_off *kptr_off_tab;
+	struct bpf_map_value_off *list_head_off_tab;
 	int timer_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
@@ -265,6 +282,11 @@ static inline bool map_value_has_kptrs(const struct bpf_map *map)
 	return !IS_ERR_OR_NULL(map->kptr_off_tab);
 }
 
+static inline bool map_value_has_list_heads(const struct bpf_map *map)
+{
+	return !IS_ERR_OR_NULL(map->list_head_off_tab);
+}
+
 static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 {
 	if (unlikely(map_value_has_spin_lock(map)))
@@ -278,6 +300,13 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 		for (i = 0; i < tab->nr_off; i++)
 			*(u64 *)(dst + tab->off[i].offset) = 0;
 	}
+	if (unlikely(map_value_has_list_heads(map))) {
+		struct bpf_map_value_off *tab = map->list_head_off_tab;
+		int i;
+
+		for (i = 0; i < tab->nr_off; i++)
+			memset(dst + tab->off[i].offset, 0, sizeof(struct list_head));
+	}
 }
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
@@ -1676,6 +1705,11 @@ struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
 bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
 void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);
 
+struct bpf_map_value_off_desc *bpf_map_list_head_off_contains(struct bpf_map *map, u32 offset);
+void bpf_map_free_list_head_off_tab(struct bpf_map *map);
+struct bpf_map_value_off *bpf_map_copy_list_head_off_tab(const struct bpf_map *map);
+bool bpf_map_equal_list_head_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
+
 struct bpf_map *bpf_map_get(u32 ufd);
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 8062f9da7c40..9b62b8b2117e 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -156,6 +156,8 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
 struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 					  const struct btf_type *t);
+struct bpf_map_value_off *btf_parse_list_heads(struct btf *btf,
+					       const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 832b2659e96e..c7263ee3a35f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -423,6 +423,8 @@ static void array_map_free(struct bpf_map *map)
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	int i;
 
+	bpf_map_free_list_head_off_tab(map);
+
 	if (map_value_has_kptrs(map)) {
 		if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 			for (i = 0; i < array->map.max_entries; i++) {
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 58cb0c179097..b5ccd76026b6 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -616,6 +616,7 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
 		rcu_barrier();
 		bpf_map_free_kptr_off_tab(&smap->map);
 	}
+	bpf_map_free_list_head_off_tab(&smap->map);
 	kvfree(smap->buckets);
 	bpf_map_area_free(smap);
 }
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6740c3ade8f1..0fb045be3837 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3185,6 +3185,7 @@ enum btf_field_type {
 	BTF_FIELD_SPIN_LOCK,
 	BTF_FIELD_TIMER,
 	BTF_FIELD_KPTR,
+	BTF_FIELD_LIST_HEAD,
 };
 
 enum {
@@ -3193,9 +3194,17 @@ enum {
 };
 
 struct btf_field_info {
-	u32 type_id;
 	u32 off;
-	enum bpf_kptr_type type;
+	union {
+		struct {
+			u32 type_id;
+			enum bpf_off_type type;
+		} kptr;
+		struct {
+			u32 value_type_id;
+			const char *node_name;
+		} list_head;
+	};
 };
 
 static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
@@ -3212,7 +3221,7 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
 static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 			 u32 off, int sz, struct btf_field_info *info)
 {
-	enum bpf_kptr_type type;
+	enum bpf_off_type type;
 	u32 res_id;
 
 	/* Permit modifiers on the pointer itself */
@@ -3241,9 +3250,71 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 	if (!__btf_type_is_struct(t))
 		return -EINVAL;
 
-	info->type_id = res_id;
 	info->off = off;
-	info->type = type;
+	info->kptr.type_id = res_id;
+	info->kptr.type = type;
+	return BTF_FIELD_FOUND;
+}
+
+static const char *btf_find_decl_tag_value(const struct btf *btf,
+					   const struct btf_type *pt,
+					   int comp_idx, const char *tag_key)
+{
+	int i;
+
+	for (i = 1; i < btf_nr_types(btf); i++) {
+		const struct btf_type *t = btf_type_by_id(btf, i);
+		int len = strlen(tag_key);
+
+		if (!btf_type_is_decl_tag(t))
+			continue;
+		/* TODO: Instead of btf_type pt, it would be much better if we had BTF
+		 * ID of the map value type. This would avoid btf_type_by_id call here.
+		 */
+		if (pt != btf_type_by_id(btf, t->type) ||
+		    btf_type_decl_tag(t)->component_idx != comp_idx)
+			continue;
+		if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
+			continue;
+		return __btf_name_by_offset(btf, t->name_off) + len;
+	}
+	return NULL;
+}
+
+static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
+			      int comp_idx, const struct btf_type *t,
+			      u32 off, int sz, struct btf_field_info *info)
+{
+	const char *value_type;
+	const char *list_node;
+	s32 id;
+
+	if (!__btf_type_is_struct(t))
+		return BTF_FIELD_IGNORE;
+	if (t->size != sz)
+		return BTF_FIELD_IGNORE;
+	value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
+	if (!value_type)
+		return -EINVAL;
+	if (strncmp(value_type, "struct:", sizeof("struct:") - 1))
+		return -EINVAL;
+	value_type += sizeof("struct:") - 1;
+	list_node = strstr(value_type, ":");
+	if (!list_node)
+		return -EINVAL;
+	value_type = kstrndup(value_type, list_node - value_type, GFP_ATOMIC);
+	if (!value_type)
+		return -ENOMEM;
+	id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
+	kfree(value_type);
+	if (id < 0)
+		return id;
+	list_node++;
+	if (str_is_empty(list_node))
+		return -EINVAL;
+	info->off = off;
+	info->list_head.value_type_id = id;
+	info->list_head.node_name = list_node;
 	return BTF_FIELD_FOUND;
 }
 
@@ -3286,6 +3357,12 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
 			if (ret < 0)
 				return ret;
 			break;
+		case BTF_FIELD_LIST_HEAD:
+			ret = btf_find_list_head(btf, t, i, member_type, off, sz,
+						 idx < info_cnt ? &info[idx] : &tmp);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return -EFAULT;
 		}
@@ -3336,6 +3413,12 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 			if (ret < 0)
 				return ret;
 			break;
+		case BTF_FIELD_LIST_HEAD:
+			ret = btf_find_list_head(btf, var, -1, var_type, off, sz,
+						 idx < info_cnt ? &info[idx] : &tmp);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return -EFAULT;
 		}
@@ -3372,6 +3455,11 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
 		sz = sizeof(u64);
 		align = 8;
 		break;
+	case BTF_FIELD_LIST_HEAD:
+		name = "bpf_list_head";
+		sz = sizeof(struct bpf_list_head);
+		align = __alignof__(struct bpf_list_head);
+		break;
 	default:
 		return -EFAULT;
 	}
@@ -3440,7 +3528,7 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 		/* Find type in map BTF, and use it to look up the matching type
 		 * in vmlinux or module BTFs, by name and kind.
 		 */
-		t = btf_type_by_id(btf, info_arr[i].type_id);
+		t = btf_type_by_id(btf, info_arr[i].kptr.type_id);
 		id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
 				     &kernel_btf);
 		if (id < 0) {
@@ -3451,7 +3539,7 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 		/* Find and stash the function pointer for the destruction function that
 		 * needs to be eventually invoked from the map free path.
 		 */
-		if (info_arr[i].type == BPF_KPTR_REF) {
+		if (info_arr[i].kptr.type == BPF_KPTR_REF) {
 			const struct btf_type *dtor_func;
 			const char *dtor_func_name;
 			unsigned long addr;
@@ -3494,7 +3582,7 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 		}
 
 		tab->off[i].offset = info_arr[i].off;
-		tab->off[i].type = info_arr[i].type;
+		tab->off[i].type = info_arr[i].kptr.type;
 		tab->off[i].kptr.btf_id = id;
 		tab->off[i].kptr.btf = kernel_btf;
 		tab->off[i].kptr.module = mod;
@@ -3515,6 +3603,75 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 	return ERR_PTR(ret);
 }
 
+struct bpf_map_value_off *btf_parse_list_heads(struct btf *btf, const struct btf_type *t)
+{
+	struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX];
+	struct bpf_map_value_off *tab;
+	int ret, i, nr_off;
+
+	ret = btf_find_field(btf, t, BTF_FIELD_LIST_HEAD, info_arr, ARRAY_SIZE(info_arr));
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (!ret)
+		return NULL;
+
+	nr_off = ret;
+	tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN);
+	if (!tab)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < nr_off; i++) {
+		const struct btf_type *t, *n = NULL;
+		const struct btf_member *member;
+		u32 offset;
+		int j;
+
+		t = btf_type_by_id(btf, info_arr[i].list_head.value_type_id);
+		/* We've already checked that value_type_id is a struct type. We
+		 * just need to figure out the offset of the list_node, and
+		 * verify its type.
+		 */
+		ret = -EINVAL;
+		for_each_member(j, t, member) {
+			if (strcmp(info_arr[i].list_head.node_name, __btf_name_by_offset(btf, member->name_off)))
+				continue;
+			/* Invalid BTF, two members with same name */
+			if (n) {
+				/* We also need to btf_put for the current iteration! */
+				i++;
+				goto end;
+			}
+			n = btf_type_by_id(btf, member->type);
+			if (!__btf_type_is_struct(n))
+				goto end;
+			if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+				goto end;
+			offset = __btf_member_bit_offset(n, member);
+			if (offset % 8)
+				goto end;
+			offset /= 8;
+			if (offset % __alignof__(struct bpf_list_node))
+				goto end;
+
+			tab->off[i].offset = info_arr[i].off;
+			tab->off[i].type = BPF_LIST_HEAD;
+			btf_get(btf);
+			tab->off[i].list_head.btf = btf;
+			tab->off[i].list_head.value_type_id = info_arr[i].list_head.value_type_id;
+			tab->off[i].list_head.list_node_off = offset;
+		}
+		if (!n)
+			goto end;
+	}
+	tab->nr_off = nr_off;
+	return tab;
+end:
+	while (i--)
+		btf_put(tab->off[i].list_head.btf);
+	kfree(tab);
+	return ERR_PTR(ret);
+}
+
 static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
 			      u32 type_id, void *data, u8 bits_offset,
 			      struct btf_show *show)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index bb3f8a63c221..270e0ecf4ba3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1518,6 +1518,7 @@ static void htab_map_free(struct bpf_map *map)
 		prealloc_destroy(htab);
 	}
 
+	bpf_map_free_list_head_off_tab(map);
 	bpf_map_free_kptr_off_tab(map);
 	free_percpu(htab->extra_elems);
 	bpf_map_area_free(htab->buckets);
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 135205d0d560..ced2559129ab 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -53,6 +53,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 	inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
 	inner_map_meta->timer_off = inner_map->timer_off;
 	inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map);
+	inner_map_meta->list_head_off_tab = bpf_map_copy_list_head_off_tab(inner_map);
 	if (inner_map->btf) {
 		btf_get(inner_map->btf);
 		inner_map_meta->btf = inner_map->btf;
@@ -72,6 +73,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 
 void bpf_map_meta_free(struct bpf_map *map_meta)
 {
+	bpf_map_free_list_head_off_tab(map_meta);
 	bpf_map_free_kptr_off_tab(map_meta);
 	btf_put(map_meta->btf);
 	kfree(map_meta);
@@ -86,7 +88,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
 		meta0->value_size == meta1->value_size &&
 		meta0->timer_off == meta1->timer_off &&
 		meta0->map_flags == meta1->map_flags &&
-		bpf_map_equal_kptr_off_tab(meta0, meta1);
+		bpf_map_equal_kptr_off_tab(meta0, meta1) &&
+		bpf_map_equal_list_head_off_tab(meta0, meta1);
 }
 
 void *bpf_map_fd_get_ptr(struct bpf_map *map,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0311acca19f6..e1749e0d2143 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -495,7 +495,7 @@ static void bpf_map_release_memcg(struct bpf_map *map)
 }
 #endif
 
-static int bpf_map_kptr_off_cmp(const void *a, const void *b)
+static int bpf_map_off_cmp(const void *a, const void *b)
 {
 	const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b;
 
@@ -506,18 +506,22 @@ static int bpf_map_kptr_off_cmp(const void *a, const void *b)
 	return 0;
 }
 
-struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
+static struct bpf_map_value_off_desc *
+__bpf_map_off_contains(struct bpf_map_value_off *off_tab, u32 offset)
 {
 	/* Since members are iterated in btf_find_field in increasing order,
-	 * offsets appended to kptr_off_tab are in increasing order, so we can
+	 * offsets appended to an off_tab are in increasing order, so we can
 	 * do bsearch to find exact match.
 	 */
-	struct bpf_map_value_off *tab;
+	return bsearch(&offset, off_tab->off, off_tab->nr_off, sizeof(off_tab->off[0]),
+		       bpf_map_off_cmp);
+}
 
+struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
+{
 	if (!map_value_has_kptrs(map))
 		return NULL;
-	tab = map->kptr_off_tab;
-	return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp);
+	return __bpf_map_off_contains(map->kptr_off_tab, offset);
 }
 
 void bpf_map_free_kptr_off_tab(struct bpf_map *map)
@@ -563,15 +567,15 @@ struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map)
 	return new_tab;
 }
 
-bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
+static bool __bpf_map_equal_off_tab(const struct bpf_map_value_off *tab_a,
+				    const struct bpf_map_value_off *tab_b,
+				    bool has_a, bool has_b)
 {
-	struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab;
-	bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b);
 	int size;
 
-	if (!a_has_kptr && !b_has_kptr)
+	if (!has_a && !has_b)
 		return true;
-	if (a_has_kptr != b_has_kptr)
+	if (has_a != has_b)
 		return false;
 	if (tab_a->nr_off != tab_b->nr_off)
 		return false;
@@ -579,6 +583,13 @@ bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_ma
 	return !memcmp(tab_a, tab_b, size);
 }
 
+bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
+{
+	return __bpf_map_equal_off_tab(map_a->kptr_off_tab, map_b->kptr_off_tab,
+				       map_value_has_kptrs(map_a),
+				       map_value_has_kptrs(map_b));
+}
+
 /* Caller must ensure map_value_has_kptrs is true. Note that this function can
  * be called on a map value while the map_value is visible to BPF programs, as
  * it ensures the correct synchronization, and we already enforce the same using
@@ -606,6 +617,50 @@ void bpf_map_free_kptrs(struct bpf_map *map, void *map_value)
 	}
 }
 
+struct bpf_map_value_off_desc *bpf_map_list_head_off_contains(struct bpf_map *map, u32 offset)
+{
+	if (!map_value_has_list_heads(map))
+		return NULL;
+	return __bpf_map_off_contains(map->list_head_off_tab, offset);
+}
+
+void bpf_map_free_list_head_off_tab(struct bpf_map *map)
+{
+	struct bpf_map_value_off *tab = map->list_head_off_tab;
+	int i;
+
+	if (!map_value_has_list_heads(map))
+		return;
+	for (i = 0; i < tab->nr_off; i++)
+		btf_put(tab->off[i].list_head.btf);
+	kfree(tab);
+	map->list_head_off_tab = NULL;
+}
+
+struct bpf_map_value_off *bpf_map_copy_list_head_off_tab(const struct bpf_map *map)
+{
+	struct bpf_map_value_off *tab = map->list_head_off_tab, *new_tab;
+	int size, i;
+
+	if (!map_value_has_list_heads(map))
+		return ERR_PTR(-ENOENT);
+	size = offsetof(struct bpf_map_value_off, off[tab->nr_off]);
+	new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN);
+	if (!new_tab)
+		return ERR_PTR(-ENOMEM);
+	/* Do a deep copy of the list_head_off_tab */
+	for (i = 0; i < tab->nr_off; i++)
+		btf_get(tab->off[i].list_head.btf);
+	return new_tab;
+}
+
+bool bpf_map_equal_list_head_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
+{
+	return __bpf_map_equal_off_tab(map_a->list_head_off_tab, map_b->list_head_off_tab,
+				       map_value_has_list_heads(map_a),
+				       map_value_has_list_heads(map_b));
+}
+
 /* called from workqueue */
 static void bpf_map_free_deferred(struct work_struct *work)
 {
@@ -776,7 +831,8 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
 	int err;
 
 	if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
-	    map_value_has_timer(map) || map_value_has_kptrs(map))
+	    map_value_has_timer(map) || map_value_has_kptrs(map) ||
+	    map_value_has_list_heads(map))
 		return -ENOTSUPP;
 
 	if (!(vma->vm_flags & VM_SHARED))
@@ -931,13 +987,14 @@ static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv)
 
 static int bpf_map_alloc_off_arr(struct bpf_map *map)
 {
+	bool has_list_heads = map_value_has_list_heads(map);
 	bool has_spin_lock = map_value_has_spin_lock(map);
 	bool has_timer = map_value_has_timer(map);
 	bool has_kptrs = map_value_has_kptrs(map);
 	struct bpf_map_off_arr *off_arr;
 	u32 i;
 
-	if (!has_spin_lock && !has_timer && !has_kptrs) {
+	if (!has_spin_lock && !has_timer && !has_kptrs && !has_list_heads) {
 		map->off_arr = NULL;
 		return 0;
 	}
@@ -973,6 +1030,17 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
 		}
 		off_arr->cnt += tab->nr_off;
 	}
+	if (has_list_heads) {
+		struct bpf_map_value_off *tab = map->list_head_off_tab;
+		u32 *off = &off_arr->field_off[off_arr->cnt];
+		u8 *sz = &off_arr->field_sz[off_arr->cnt];
+
+		for (i = 0; i < tab->nr_off; i++) {
+			*off++ = tab->off[i].offset;
+			*sz++ = sizeof(struct bpf_list_head);
+		}
+		off_arr->cnt += tab->nr_off;
+	}
 
 	if (off_arr->cnt == 1)
 		return 0;
@@ -1038,11 +1106,11 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	if (map_value_has_kptrs(map)) {
 		if (!bpf_capable()) {
 			ret = -EPERM;
-			goto free_map_tab;
+			goto free_map_kptr_tab;
 		}
 		if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
 			ret = -EACCES;
-			goto free_map_tab;
+			goto free_map_kptr_tab;
 		}
 		if (map->map_type != BPF_MAP_TYPE_HASH &&
 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
@@ -1054,18 +1122,42 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE) {
 			ret = -EOPNOTSUPP;
-			goto free_map_tab;
+			goto free_map_kptr_tab;
+		}
+	}
+
+	/* We need to take ref on the BTF, so pass it as non-const */
+	map->list_head_off_tab = btf_parse_list_heads((struct btf *)btf, value_type);
+	if (map_value_has_list_heads(map)) {
+		if (!bpf_capable()) {
+			ret = -EACCES;
+			goto free_map_list_head_tab;
+		}
+		if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
+			ret = -EACCES;
+			goto free_map_list_head_tab;
+		}
+		if (map->map_type != BPF_MAP_TYPE_HASH &&
+		    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+		    map->map_type != BPF_MAP_TYPE_ARRAY &&
+		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE) {
+			ret = -EOPNOTSUPP;
+			goto free_map_list_head_tab;
 		}
 	}
 
 	if (map->ops->map_check_btf) {
 		ret = map->ops->map_check_btf(map, btf, key_type, value_type);
 		if (ret < 0)
-			goto free_map_tab;
+			goto free_map_list_head_tab;
 	}
 
 	return ret;
-free_map_tab:
+free_map_list_head_tab:
+	bpf_map_free_list_head_off_tab(map);
+free_map_kptr_tab:
 	bpf_map_free_kptr_off_tab(map);
 	return ret;
 }
@@ -1889,7 +1981,8 @@ static int map_freeze(const union bpf_attr *attr)
 		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
-	    map_value_has_timer(map) || map_value_has_kptrs(map)) {
+	    map_value_has_timer(map) || map_value_has_kptrs(map) ||
+	    map_value_has_list_heads(map)) {
 		fdput(f);
 		return -ENOTSUPP;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 571790ac58d4..ab91e5ca7e41 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3879,6 +3879,20 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 			}
 		}
 	}
+	if (map_value_has_list_heads(map)) {
+		struct bpf_map_value_off *tab = map->list_head_off_tab;
+		int i;
+
+		for (i = 0; i < tab->nr_off; i++) {
+			u32 p = tab->off[i].offset;
+
+			if (reg->smin_value + off < p + sizeof(struct bpf_list_head) &&
+			    p < reg->umax_value + off + size) {
+				verbose(env, "bpf_list_head cannot be accessed directly by load/store\n");
+				return -EACCES;
+			}
+		}
+	}
 	return err;
 }
 
@@ -13165,6 +13179,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		}
 	}
 
+	if (map_value_has_list_heads(map)) {
+		if (is_tracing_prog_type(prog_type)) {
+			verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+			return -EINVAL;
+		}
+	}
+
 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
 	    !bpf_offload_prog_map_match(prog, map)) {
 		verbose(env, "offload device mismatch between prog and map\n");
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
new file mode 100644
index 000000000000..ea1b3b1839d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -0,0 +1,21 @@
+#ifndef __KERNEL__
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#else
+
+struct bpf_list_head {
+	__u64 __a;
+	__u64 __b;
+} __attribute__((aligned(8)));
+
+struct bpf_list_node {
+	__u64 __a;
+	__u64 __b;
+} __attribute__((aligned(8)));
+
+#endif
+
+#ifndef __KERNEL__
+#endif
-- 
2.34.1


  parent reply	other threads:[~2022-09-04 20:42 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-04 20:41 [PATCH RFC bpf-next v1 00/32] Local kptrs, BPF linked lists Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 01/32] bpf: Add copy_map_value_long to copy to remote percpu memory Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 02/32] bpf: Support kptrs in percpu arraymap Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 03/32] bpf: Add zero_map_value to zero map value with special fields Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 04/32] bpf: Support kptrs in percpu hashmap and percpu LRU hashmap Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 05/32] bpf: Support kptrs in local storage maps Kumar Kartikeya Dwivedi
2022-09-07 19:00   ` Alexei Starovoitov
2022-09-08  2:47     ` Kumar Kartikeya Dwivedi
2022-09-09  5:27   ` Martin KaFai Lau
2022-09-09 11:22     ` Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 06/32] bpf: Annotate data races in bpf_local_storage Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 07/32] bpf: Allow specifying volatile type modifier for kptrs Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 08/32] bpf: Add comment about kptr's PTR_TO_MAP_VALUE handling Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 09/32] bpf: Rewrite kfunc argument handling Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 10/32] bpf: Drop kfunc support from btf_check_func_arg_match Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 11/32] bpf: Support constant scalar arguments for kfuncs Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 12/32] bpf: Teach verifier about non-size constant arguments Kumar Kartikeya Dwivedi
2022-09-07 22:11   ` Alexei Starovoitov
2022-09-08  2:49     ` Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` Kumar Kartikeya Dwivedi [this message]
2022-09-07 22:46   ` [PATCH RFC bpf-next v1 13/32] bpf: Introduce bpf_list_head support for BPF maps Alexei Starovoitov
2022-09-08  2:58     ` Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 14/32] bpf: Introduce bpf_kptr_alloc helper Kumar Kartikeya Dwivedi
2022-09-07 23:30   ` Alexei Starovoitov
2022-09-08  3:01     ` Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 15/32] bpf: Add helper macro bpf_expr_for_each_reg_in_vstate Kumar Kartikeya Dwivedi
2022-09-07 23:48   ` Alexei Starovoitov
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 16/32] bpf: Introduce BPF memory object model Kumar Kartikeya Dwivedi
2022-09-08  0:34   ` Alexei Starovoitov
2022-09-08  2:39     ` Kumar Kartikeya Dwivedi
2022-09-08  3:37       ` Alexei Starovoitov
2022-09-08 11:50         ` Kumar Kartikeya Dwivedi
2022-09-08 14:18           ` Alexei Starovoitov
2022-09-08 14:45             ` Kumar Kartikeya Dwivedi
2022-09-08 15:11               ` Alexei Starovoitov
2022-09-08 15:37                 ` Kumar Kartikeya Dwivedi
2022-09-08 15:59                   ` Alexei Starovoitov
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 17/32] bpf: Support bpf_list_node in local kptrs Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 18/32] bpf: Support bpf_spin_lock " Kumar Kartikeya Dwivedi
2022-09-08  0:35   ` Alexei Starovoitov
2022-09-09  8:25     ` Dave Marchevsky
2022-09-09 11:20       ` Kumar Kartikeya Dwivedi
2022-09-09 14:26         ` Alexei Starovoitov
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 19/32] bpf: Support bpf_list_head " Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 20/32] bpf: Introduce bpf_kptr_free helper Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 21/32] bpf: Allow locking bpf_spin_lock global variables Kumar Kartikeya Dwivedi
2022-09-08  0:27   ` Alexei Starovoitov
2022-09-08  0:39     ` Kumar Kartikeya Dwivedi
2022-09-08  0:55       ` Alexei Starovoitov
2022-09-08  1:00     ` Kumar Kartikeya Dwivedi
2022-09-08  1:08       ` Alexei Starovoitov
2022-09-08  1:15         ` Kumar Kartikeya Dwivedi
2022-09-08  2:39           ` Alexei Starovoitov
2022-09-09  8:13   ` Dave Marchevsky
2022-09-09 11:05     ` Kumar Kartikeya Dwivedi
2022-09-09 14:24       ` Alexei Starovoitov
2022-09-09 14:50         ` Kumar Kartikeya Dwivedi
2022-09-09 14:58           ` Alexei Starovoitov
2022-09-09 18:32             ` Andrii Nakryiko
2022-09-09 19:25               ` Alexei Starovoitov
2022-09-09 20:21                 ` Andrii Nakryiko
2022-09-09 20:57                   ` Alexei Starovoitov
2022-09-10  0:21                     ` Andrii Nakryiko
2022-09-11 22:31                       ` Alexei Starovoitov
2022-09-20 20:55                         ` Andrii Nakryiko
2022-10-18  4:06                           ` Andrii Nakryiko
2022-09-09 22:30                 ` Dave Marchevsky
2022-09-09 22:49                   ` Kumar Kartikeya Dwivedi
2022-09-09 22:57                     ` Alexei Starovoitov
2022-09-09 23:04                       ` Kumar Kartikeya Dwivedi
2022-09-09 22:51                   ` Alexei Starovoitov
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 22/32] bpf: Bump BTF_KFUNC_SET_MAX_CNT Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 23/32] bpf: Add single ownership BPF linked list API Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 24/32] bpf: Permit NULL checking pointer with non-zero fixed offset Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 25/32] bpf: Allow storing local kptrs in BPF maps Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 26/32] bpf: Wire up freeing of bpf_list_heads in maps Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 27/32] bpf: Add destructor for bpf_list_head in local kptr Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 28/32] bpf: Remove duplicate PTR_TO_BTF_ID RO check Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 29/32] libbpf: Add support for private BSS map section Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 30/32] selftests/bpf: Add BTF tag macros for local kptrs, BPF linked lists Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 31/32] selftests/bpf: Add BPF linked list API tests Kumar Kartikeya Dwivedi
2022-09-04 20:41 ` [PATCH RFC bpf-next v1 32/32] selftests/bpf: Add referenced local kptr tests Kumar Kartikeya Dwivedi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220904204145.3089-14-memxor@gmail.com \
    --to=memxor@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davemarchevsky@fb.com \
    --cc=delyank@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.