bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next 0/3] bpf: Allow inner map with different max_entries
@ 2020-05-22  2:23 Martin KaFai Lau
  2020-05-22  2:23 ` [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h Martin KaFai Lau
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Martin KaFai Lau @ 2020-05-22  2:23 UTC (permalink / raw)
  To: bpf; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev

People has a use case that starts with a smaller inner map first and then
replaces it with a larger inner map later when it is needed.

This series allows the outer map to be updated with inner map in different
size as long as it is safe (meaning the max_entries is not used in the
verification time during prog load).

Please see individual patch for details.

v2:
- New BPF_MAP_TYPE_FL to minimize code churns (Alexei)
- s/capabilities/properties/ (Andrii)
- Describe WHY in commit log (Andrii)

Martin KaFai Lau (3):
  bpf: Consolidate inner-map-compatible properties into bpf_types.h
  bpf: Relax the max_entries check for inner map
  bpf: selftests: Add test for different inner map size

 include/linux/bpf.h                           | 20 ++++++++++--
 include/linux/bpf_types.h                     | 25 +++++++++++----
 kernel/bpf/btf.c                              |  4 +--
 kernel/bpf/map_in_map.c                       | 12 +++----
 kernel/bpf/syscall.c                          | 25 +++++++++++----
 kernel/bpf/verifier.c                         |  4 +--
 .../selftests/bpf/prog_tests/btf_map_in_map.c | 12 +++++++
 .../selftests/bpf/progs/test_btf_map_in_map.c | 31 +++++++++++++++++++
 8 files changed, 107 insertions(+), 26 deletions(-)

-- 
2.24.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h
  2020-05-22  2:23 [PATCH v2 bpf-next 0/3] bpf: Allow inner map with different max_entries Martin KaFai Lau
@ 2020-05-22  2:23 ` Martin KaFai Lau
  2020-05-22 22:22   ` Daniel Borkmann
  2020-05-22  2:23 ` [PATCH v2 bpf-next 2/3] bpf: Relax the max_entries check for inner map Martin KaFai Lau
  2020-05-22  2:23 ` [PATCH v2 bpf-next 3/3] bpf: selftests: Add test for different inner map size Martin KaFai Lau
  2 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2020-05-22  2:23 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev, Andrey Ignatov

This patch adds a "properties" member to "struct bpf_map".  The properties of
individual map-type is tagged in the "BPF_MAP_TYPE_FL" in bpf_types.h.
The original "BPF_MAP_TYPE" is defined to "BPF_MAP_TYPE_FL(..., 0)".

It will be less error prone when a map's properties is decided at the same
place as the new map type is added in bpf_types.h.  That will help to
avoid mistake like missing modification in other source files like
the map_in_map.c here or other source files in the future.

Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h       |  8 ++++++--
 include/linux/bpf_types.h | 19 +++++++++++++++----
 kernel/bpf/btf.c          |  4 ++--
 kernel/bpf/map_in_map.c   |  9 ++-------
 kernel/bpf/syscall.c      | 25 +++++++++++++++++++------
 kernel/bpf/verifier.c     |  4 ++--
 6 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index efe8836b5c48..f947d899aa46 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -97,6 +97,9 @@ struct bpf_map_memory {
 	struct user_struct *user;
 };
 
+/* Cannot be used as an inner map */
+#define BPF_MAP_NO_INNER_MAP (1 << 0)
+
 struct bpf_map {
 	/* The first two cachelines with read-mostly members of which some
 	 * are also accessed in fast-path (e.g. ops, max_entries).
@@ -120,6 +123,7 @@ struct bpf_map {
 	struct bpf_map_memory memory;
 	char name[BPF_OBJ_NAME_LEN];
 	u32 btf_vmlinux_value_type_id;
+	u32 properties;
 	bool bypass_spec_v1;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
 	/* 22 bytes hole */
@@ -1037,12 +1041,12 @@ extern const struct file_operations bpf_iter_fops;
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	extern const struct bpf_prog_ops _name ## _prog_ops; \
 	extern const struct bpf_verifier_ops _name ## _verifier_ops;
-#define BPF_MAP_TYPE(_id, _ops) \
+#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
 	extern const struct bpf_map_ops _ops;
 #define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
-#undef BPF_MAP_TYPE
+#undef BPF_MAP_TYPE_FL
 #undef BPF_LINK_TYPE
 
 extern const struct bpf_prog_ops bpf_offload_prog_ops;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 29d22752fc87..3f32702c9bf4 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -76,16 +76,25 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
 #endif /* CONFIG_BPF_LSM */
 #endif
 
+#define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
+
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
+/* prog_array->aux->{type,jited} is a runtime binding.
+ * Doing static check alone in the verifier is not enough,
+ * so BPF_MAP_NO_INNTER_MAP is needed.
+ */
+BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops,
+		BPF_MAP_NO_INNER_MAP)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
 #ifdef CONFIG_CGROUPS
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 #endif
 #ifdef CONFIG_CGROUP_BPF
-BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
+BPF_MAP_TYPE_FL(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops,
+		BPF_MAP_NO_INNER_MAP)
+BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops,
+		BPF_MAP_NO_INNER_MAP)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
@@ -116,8 +125,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
 #if defined(CONFIG_BPF_JIT)
-BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
+BPF_MAP_TYPE_FL(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops,
+		BPF_MAP_NO_INNER_MAP)
 #endif
+#undef BPF_MAP_TYPE
 
 BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 58c9af1d4808..f527a120eef3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3481,7 +3481,7 @@ extern char __weak __start_BTF[];
 extern char __weak __stop_BTF[];
 extern struct btf *btf_vmlinux;
 
-#define BPF_MAP_TYPE(_id, _ops)
+#define BPF_MAP_TYPE_FL(_id, _ops, properties)
 #define BPF_LINK_TYPE(_id, _name)
 static union {
 	struct bpf_ctx_convert {
@@ -3508,7 +3508,7 @@ static u8 bpf_ctx_convert_map[] = {
 #undef BPF_PROG_TYPE
 	0, /* avoid empty array */
 };
-#undef BPF_MAP_TYPE
+#undef BPF_MAP_TYPE_FL
 #undef BPF_LINK_TYPE
 
 static const struct btf_member *
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 17738c93bec8..d965a1d328a9 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 	if (IS_ERR(inner_map))
 		return inner_map;
 
-	/* prog_array->aux->{type,jited} is a runtime binding.
-	 * Doing static check alone in the verifier is not enough.
-	 */
-	if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
-	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
-	    inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
-	    inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+	if (inner_map->properties & BPF_MAP_NO_INNER_MAP) {
 		fdput(f);
 		return ERR_PTR(-ENOTSUPP);
 	}
@@ -56,6 +50,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 	inner_map_meta->map_flags = inner_map->map_flags;
 	inner_map_meta->max_entries = inner_map->max_entries;
 	inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
+	inner_map_meta->properties = inner_map->properties;
 
 	/* Misc members not needed in bpf_map_meta_equal() check. */
 	inner_map_meta->ops = inner_map->ops;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 431241c74614..02ec14e3e19a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -49,12 +49,23 @@ int sysctl_unprivileged_bpf_disabled __read_mostly;
 
 static const struct bpf_map_ops * const bpf_map_types[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
-#define BPF_MAP_TYPE(_id, _ops) \
+#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
 	[_id] = &_ops,
 #define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
-#undef BPF_MAP_TYPE
+#undef BPF_MAP_TYPE_FL
+#undef BPF_LINK_TYPE
+};
+
+static const u32 bpf_map_properties[] = {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
+#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
+	[_id] = properties,
+#define BPF_LINK_TYPE(_id, _name)
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+#undef BPF_MAP_TYPE_FL
 #undef BPF_LINK_TYPE
 };
 
@@ -131,6 +142,8 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 		return map;
 	map->ops = ops;
 	map->map_type = type;
+	map->properties = bpf_map_properties[type];
+
 	return map;
 }
 
@@ -1551,11 +1564,11 @@ static int map_freeze(const union bpf_attr *attr)
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	[_id] = & _name ## _prog_ops,
-#define BPF_MAP_TYPE(_id, _ops)
+#define BPF_MAP_TYPE_FL(_id, _ops, properties)
 #define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
-#undef BPF_MAP_TYPE
+#undef BPF_MAP_TYPE_FL
 #undef BPF_LINK_TYPE
 };
 
@@ -2333,14 +2346,14 @@ static int bpf_link_release(struct inode *inode, struct file *filp)
 
 #ifdef CONFIG_PROC_FS
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
-#define BPF_MAP_TYPE(_id, _ops)
+#define BPF_MAP_TYPE_FL(_id, _ops, properties)
 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
 static const char *bpf_link_type_strs[] = {
 	[BPF_LINK_TYPE_UNSPEC] = "<invalid>",
 #include <linux/bpf_types.h>
 };
 #undef BPF_PROG_TYPE
-#undef BPF_MAP_TYPE
+#undef BPF_MAP_TYPE_FL
 #undef BPF_LINK_TYPE
 
 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2ed8351f47a4..1743e070d08f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -27,11 +27,11 @@
 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	[_id] = & _name ## _verifier_ops,
-#define BPF_MAP_TYPE(_id, _ops)
+#define BPF_MAP_TYPE_FL(_id, _ops, properties)
 #define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
-#undef BPF_MAP_TYPE
+#undef BPF_MAP_TYPE_FL
 #undef BPF_LINK_TYPE
 };
 
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 bpf-next 2/3] bpf: Relax the max_entries check for inner map
  2020-05-22  2:23 [PATCH v2 bpf-next 0/3] bpf: Allow inner map with different max_entries Martin KaFai Lau
  2020-05-22  2:23 ` [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h Martin KaFai Lau
@ 2020-05-22  2:23 ` Martin KaFai Lau
  2020-05-22 22:26   ` Daniel Borkmann
  2020-05-22  2:23 ` [PATCH v2 bpf-next 3/3] bpf: selftests: Add test for different inner map size Martin KaFai Lau
  2 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2020-05-22  2:23 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev, Andrey Ignatov

This patch relaxes the max_entries check for most of the inner map types
during an update to the outer map.  The max_entries of those map types
are only used in runtime.  By doing this, an inner map with different
size can be updated to the outer map in runtime.  People has a use
case that starts with a smaller inner map first and then replaces
it with a larger inner map later when it is needed.

The max_entries of arraymap and xskmap are used statically
in verification time to generate the inline code, so they
are excluded in this patch.

Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h       | 12 ++++++++++++
 include/linux/bpf_types.h |  6 ++++--
 kernel/bpf/map_in_map.c   |  3 ++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f947d899aa46..1839ef9aca02 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -99,6 +99,18 @@ struct bpf_map_memory {
 
 /* Cannot be used as an inner map */
 #define BPF_MAP_NO_INNER_MAP (1 << 0)
+/* When a prog has used map-in-map, the verifier requires
+ * an inner-map as a template to verify the access operations
+ * on the outer and inner map.  For some inner map-types,
+ * the verifier uses the inner_map's max_entries statically
+ * (e.g. to generate inline code).  If this verification
+ * time usage on max_entries applies to an inner map-type,
+ * during runtime, only the inner map with the same
+ * max_entries can be updated to this outer map.
+ *
+ * Please see bpf_map_meta_equal() for details.
+ */
+#define BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE (1 << 1)
 
 struct bpf_map {
 	/* The first two cachelines with read-mostly members of which some
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 3f32702c9bf4..85861722b7f3 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -78,7 +78,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
 
 #define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
 
-BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
+BPF_MAP_TYPE_FL(BPF_MAP_TYPE_ARRAY, array_map_ops,
+		BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
 /* prog_array->aux->{type,jited} is a runtime binding.
  * Doing static check alone in the verifier is not enough,
@@ -116,7 +117,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
-BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
+BPF_MAP_TYPE_FL(BPF_MAP_TYPE_XSKMAP, xsk_map_ops,
+		BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE)
 #endif
 #ifdef CONFIG_INET
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index d965a1d328a9..b296fe8af1ad 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -77,7 +77,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
 		meta0->key_size == meta1->key_size &&
 		meta0->value_size == meta1->value_size &&
 		meta0->map_flags == meta1->map_flags &&
-		meta0->max_entries == meta1->max_entries;
+		(meta0->max_entries == meta1->max_entries ||
+		 !(meta0->properties & BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE));
 }
 
 void *bpf_map_fd_get_ptr(struct bpf_map *map,
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 bpf-next 3/3] bpf: selftests: Add test for different inner map size
  2020-05-22  2:23 [PATCH v2 bpf-next 0/3] bpf: Allow inner map with different max_entries Martin KaFai Lau
  2020-05-22  2:23 ` [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h Martin KaFai Lau
  2020-05-22  2:23 ` [PATCH v2 bpf-next 2/3] bpf: Relax the max_entries check for inner map Martin KaFai Lau
@ 2020-05-22  2:23 ` Martin KaFai Lau
  2 siblings, 0 replies; 10+ messages in thread
From: Martin KaFai Lau @ 2020-05-22  2:23 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev, Andrey Ignatov

This patch tests the inner map size can be different
for reuseport_sockarray but has to be the same for
arraymap.

Cc: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 .../selftests/bpf/prog_tests/btf_map_in_map.c | 12 +++++++
 .../selftests/bpf/progs/test_btf_map_in_map.c | 31 +++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index f7ee8fa377ad..7ae767d15608 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -44,6 +44,18 @@ void test_btf_map_in_map(void)
 	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
 	CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
 
+	val = bpf_map__fd(skel->maps.sockarr_sz2);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr_sz1),
+				  &key, &val, 0);
+	CHECK(err, "outer_sockarr inner map size check",
+	      "cannot use an inner_map with different size\n");
+
+	val = bpf_map__fd(skel->maps.inner_map_sz2);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key,
+				  &val, 0);
+	CHECK(!err, "outer_arr inner map size check",
+	      "incorrectly updated with an inner_map in different size\n");
+
 cleanup:
 	test_btf_map_in_map__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index e5093796be97..0b8e04a817f6 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -11,6 +11,13 @@ struct inner_map {
 } inner_map1 SEC(".maps"),
   inner_map2 SEC(".maps");
 
+struct inner_map_sz2 {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} inner_map_sz2 SEC(".maps");
+
 struct outer_arr {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 3);
@@ -50,6 +57,30 @@ struct outer_hash {
 	},
 };
 
+struct sockarr_sz1 {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__array(values, struct sockarr_sz1);
+} outer_sockarr_sz1 SEC(".maps") = {
+	.values = { (void *)&sockarr_sz1 },
+};
+
 int input = 0;
 
 SEC("raw_tp/sys_enter")
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h
  2020-05-22  2:23 ` [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h Martin KaFai Lau
@ 2020-05-22 22:22   ` Daniel Borkmann
  2020-05-23  1:00     ` Martin KaFai Lau
  0 siblings, 1 reply; 10+ messages in thread
From: Daniel Borkmann @ 2020-05-22 22:22 UTC (permalink / raw)
  To: Martin KaFai Lau, bpf
  Cc: Alexei Starovoitov, kernel-team, netdev, Andrey Ignatov

On 5/22/20 4:23 AM, Martin KaFai Lau wrote:
[...]
>   };
>   
> +/* Cannot be used as an inner map */
> +#define BPF_MAP_NO_INNER_MAP (1 << 0)
> +
>   struct bpf_map {
>   	/* The first two cachelines with read-mostly members of which some
>   	 * are also accessed in fast-path (e.g. ops, max_entries).
> @@ -120,6 +123,7 @@ struct bpf_map {
>   	struct bpf_map_memory memory;
>   	char name[BPF_OBJ_NAME_LEN];
>   	u32 btf_vmlinux_value_type_id;
> +	u32 properties;
>   	bool bypass_spec_v1;
>   	bool frozen; /* write-once; write-protected by freeze_mutex */
>   	/* 22 bytes hole */
> @@ -1037,12 +1041,12 @@ extern const struct file_operations bpf_iter_fops;
>   #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
>   	extern const struct bpf_prog_ops _name ## _prog_ops; \
>   	extern const struct bpf_verifier_ops _name ## _verifier_ops;
> -#define BPF_MAP_TYPE(_id, _ops) \
> +#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
>   	extern const struct bpf_map_ops _ops;
>   #define BPF_LINK_TYPE(_id, _name)
>   #include <linux/bpf_types.h>
>   #undef BPF_PROG_TYPE
> -#undef BPF_MAP_TYPE
> +#undef BPF_MAP_TYPE_FL
>   #undef BPF_LINK_TYPE
>   
>   extern const struct bpf_prog_ops bpf_offload_prog_ops;
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 29d22752fc87..3f32702c9bf4 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -76,16 +76,25 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
>   #endif /* CONFIG_BPF_LSM */
>   #endif
>   
> +#define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
> +
>   BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
>   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> -BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
> +/* prog_array->aux->{type,jited} is a runtime binding.
> + * Doing static check alone in the verifier is not enough,
> + * so BPF_MAP_NO_INNTER_MAP is needed.

typo: INNTER

> + */
> +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops,
> +		BPF_MAP_NO_INNER_MAP)

Probably nit, but what is "FL"? flags? We do have map_flags already, but here the
BPF_MAP_NO_INNER_MAP ends up in 'properties' instead. To avoid confusion, it would
probably be better to name it 'map_flags_fixed' since this is what it really means;
fixed flags that cannot be changed/controlled when creating a map.

>   BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
>   #ifdef CONFIG_CGROUPS
>   BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
>   #endif
>   #ifdef CONFIG_CGROUP_BPF
> -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
> -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
> +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops,
> +		BPF_MAP_NO_INNER_MAP)
> +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops,
> +		BPF_MAP_NO_INNER_MAP)
>   #endif
>   BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
>   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
> @@ -116,8 +125,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
>   BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
>   BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
>   #if defined(CONFIG_BPF_JIT)
> -BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
> +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops,
> +		BPF_MAP_NO_INNER_MAP)
>   #endif
> +#undef BPF_MAP_TYPE
>   
>   BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
>   BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
[...]
> diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
> index 17738c93bec8..d965a1d328a9 100644
> --- a/kernel/bpf/map_in_map.c
> +++ b/kernel/bpf/map_in_map.c
> @@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
>   	if (IS_ERR(inner_map))
>   		return inner_map;
>   
> -	/* prog_array->aux->{type,jited} is a runtime binding.
> -	 * Doing static check alone in the verifier is not enough.
> -	 */
> -	if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
> -	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
> -	    inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
> -	    inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
> +	if (inner_map->properties & BPF_MAP_NO_INNER_MAP) {
>   		fdput(f);
>   		return ERR_PTR(-ENOTSUPP);
>   	}

This whole check here is currently very fragile. For example, given we forbid cgroup
local storage here, why do we not forbid socket local storage? What about other maps
like stackmap? It's quite unclear if it even works as expected and if there's also a
use-case we are aware of. Why not making this an explicit opt-in?

Like explicit annotating via struct bpf_map_ops where everything is visible in one
single place where the map is defined:

const struct bpf_map_ops array_map_ops = {
         .map_alloc_check = array_map_alloc_check,
         [...]
         .map_flags_fixed = BPF_MAP_IN_MAP_OK,
};

That way, if someone forgets to add .map_flags_fixed to a new map type, it's okay since
it's _safe_ to forget to add these flags (and okay to add in future uapi-wise) as opposed
to the other way round where one can easily miss the opt-out case and potentially crash
the machine worst case.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next 2/3] bpf: Relax the max_entries check for inner map
  2020-05-22  2:23 ` [PATCH v2 bpf-next 2/3] bpf: Relax the max_entries check for inner map Martin KaFai Lau
@ 2020-05-22 22:26   ` Daniel Borkmann
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Borkmann @ 2020-05-22 22:26 UTC (permalink / raw)
  To: Martin KaFai Lau, bpf
  Cc: Alexei Starovoitov, kernel-team, netdev, Andrey Ignatov

On 5/22/20 4:23 AM, Martin KaFai Lau wrote:
> This patch relaxes the max_entries check for most of the inner map types
> during an update to the outer map.  The max_entries of those map types
> are only used in runtime.  By doing this, an inner map with different
> size can be updated to the outer map in runtime.  People has a use
> case that starts with a smaller inner map first and then replaces
> it with a larger inner map later when it is needed.
> 
> The max_entries of arraymap and xskmap are used statically
> in verification time to generate the inline code, so they
> are excluded in this patch.
> 
> Cc: Andrey Ignatov <rdna@fb.com>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---
>   include/linux/bpf.h       | 12 ++++++++++++
>   include/linux/bpf_types.h |  6 ++++--
>   kernel/bpf/map_in_map.c   |  3 ++-
>   3 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f947d899aa46..1839ef9aca02 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -99,6 +99,18 @@ struct bpf_map_memory {
>   
>   /* Cannot be used as an inner map */
>   #define BPF_MAP_NO_INNER_MAP (1 << 0)
> +/* When a prog has used map-in-map, the verifier requires
> + * an inner-map as a template to verify the access operations
> + * on the outer and inner map.  For some inner map-types,
> + * the verifier uses the inner_map's max_entries statically
> + * (e.g. to generate inline code).  If this verification
> + * time usage on max_entries applies to an inner map-type,
> + * during runtime, only the inner map with the same
> + * max_entries can be updated to this outer map.
> + *
> + * Please see bpf_map_meta_equal() for details.
> + */
> +#define BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE (1 << 1)
>   
>   struct bpf_map {
>   	/* The first two cachelines with read-mostly members of which some
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 3f32702c9bf4..85861722b7f3 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -78,7 +78,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
>   
>   #define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
>   
> -BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
> +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_ARRAY, array_map_ops,
> +		BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE)
>   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
>   /* prog_array->aux->{type,jited} is a runtime binding.
>    * Doing static check alone in the verifier is not enough,
> @@ -116,7 +117,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
>   #endif
>   BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
>   #if defined(CONFIG_XDP_SOCKETS)
> -BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
> +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_XSKMAP, xsk_map_ops,
> +		BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE)
>   #endif
>   #ifdef CONFIG_INET
>   BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
> diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
> index d965a1d328a9..b296fe8af1ad 100644
> --- a/kernel/bpf/map_in_map.c
> +++ b/kernel/bpf/map_in_map.c
> @@ -77,7 +77,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
>   		meta0->key_size == meta1->key_size &&
>   		meta0->value_size == meta1->value_size &&
>   		meta0->map_flags == meta1->map_flags &&
> -		meta0->max_entries == meta1->max_entries;
> +		(meta0->max_entries == meta1->max_entries ||
> +		 !(meta0->properties & BPF_MAP_NO_DYNAMIC_INNER_MAP_SIZE));

Same thought here on making it an explicit opt-in instead. So no 'by default a
dynamic inner map size is safe and enabled' but instead the other way round and
allow the cases we know that are working and care about (e.g. htab, lru, etc)
where we can safely follow-up later with more on a case-by-case basis.

>   }
>   
>   void *bpf_map_fd_get_ptr(struct bpf_map *map,
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h
  2020-05-22 22:22   ` Daniel Borkmann
@ 2020-05-23  1:00     ` Martin KaFai Lau
  2020-05-26 17:54       ` Andrii Nakryiko
  0 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2020-05-23  1:00 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: bpf, Alexei Starovoitov, kernel-team, netdev, Andrey Ignatov

On Sat, May 23, 2020 at 12:22:48AM +0200, Daniel Borkmann wrote:
> On 5/22/20 4:23 AM, Martin KaFai Lau wrote:
> [...]
> >   };
> > +/* Cannot be used as an inner map */
> > +#define BPF_MAP_NO_INNER_MAP (1 << 0)
> > +
> >   struct bpf_map {
> >   	/* The first two cachelines with read-mostly members of which some
> >   	 * are also accessed in fast-path (e.g. ops, max_entries).
> > @@ -120,6 +123,7 @@ struct bpf_map {
> >   	struct bpf_map_memory memory;
> >   	char name[BPF_OBJ_NAME_LEN];
> >   	u32 btf_vmlinux_value_type_id;
> > +	u32 properties;
> >   	bool bypass_spec_v1;
> >   	bool frozen; /* write-once; write-protected by freeze_mutex */
> >   	/* 22 bytes hole */
> > @@ -1037,12 +1041,12 @@ extern const struct file_operations bpf_iter_fops;
> >   #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
> >   	extern const struct bpf_prog_ops _name ## _prog_ops; \
> >   	extern const struct bpf_verifier_ops _name ## _verifier_ops;
> > -#define BPF_MAP_TYPE(_id, _ops) \
> > +#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
> >   	extern const struct bpf_map_ops _ops;
> >   #define BPF_LINK_TYPE(_id, _name)
> >   #include <linux/bpf_types.h>
> >   #undef BPF_PROG_TYPE
> > -#undef BPF_MAP_TYPE
> > +#undef BPF_MAP_TYPE_FL
> >   #undef BPF_LINK_TYPE
> >   extern const struct bpf_prog_ops bpf_offload_prog_ops;
> > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> > index 29d22752fc87..3f32702c9bf4 100644
> > --- a/include/linux/bpf_types.h
> > +++ b/include/linux/bpf_types.h
> > @@ -76,16 +76,25 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
> >   #endif /* CONFIG_BPF_LSM */
> >   #endif
> > +#define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
> > +
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> > -BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
> > +/* prog_array->aux->{type,jited} is a runtime binding.
> > + * Doing static check alone in the verifier is not enough,
> > + * so BPF_MAP_NO_INNTER_MAP is needed.
> 
> typo: INNTER
Good catch.

> 
> > + */
> > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops,
> > +		BPF_MAP_NO_INNER_MAP)
> 
> Probably nit, but what is "FL"? flags? We do have map_flags already, but here the
> BPF_MAP_NO_INNER_MAP ends up in 'properties' instead. To avoid confusion, it would
> probably be better to name it 'map_flags_fixed' since this is what it really means;
> fixed flags that cannot be changed/controlled when creating a map.
ok. may be BPF_MAP_TYPE_FIXED_FL?

> 
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
> >   #ifdef CONFIG_CGROUPS
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
> >   #endif
> >   #ifdef CONFIG_CGROUP_BPF
> > -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
> > -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
> > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops,
> > +		BPF_MAP_NO_INNER_MAP)
> > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops,
> > +		BPF_MAP_NO_INNER_MAP)
> >   #endif
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
> > @@ -116,8 +125,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
> >   BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
> >   #if defined(CONFIG_BPF_JIT)
> > -BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
> > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops,
> > +		BPF_MAP_NO_INNER_MAP)
> >   #endif
> > +#undef BPF_MAP_TYPE
> >   BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
> >   BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
> [...]
> > diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
> > index 17738c93bec8..d965a1d328a9 100644
> > --- a/kernel/bpf/map_in_map.c
> > +++ b/kernel/bpf/map_in_map.c
> > @@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
> >   	if (IS_ERR(inner_map))
> >   		return inner_map;
> > -	/* prog_array->aux->{type,jited} is a runtime binding.
> > -	 * Doing static check alone in the verifier is not enough.
> > -	 */
> > -	if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
> > -	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
> > -	    inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
> > -	    inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
> > +	if (inner_map->properties & BPF_MAP_NO_INNER_MAP) {
> >   		fdput(f);
> >   		return ERR_PTR(-ENOTSUPP);
> >   	}
> 
> This whole check here is currently very fragile. For example, given we forbid cgroup
> local storage here, why do we not forbid socket local storage? What about other maps
> like stackmap? It's quite unclear if it even works as expected and if there's also a
> use-case we are aware of. Why not making this an explicit opt-in?
Re: "cgroup-local-storage", my understanding is,
cgroup-local-storage is local to the bpf's cgroup that it is running under,
so it is not ok for a cgroup's bpf to be able to access other cgroup's local
storage through map-in-map, so they are excluded here.

sk-local-storage does not have this restriction.  For other maps, if there is
no known safety issue, why restricting it and create unnecessary API
discrepancy?

I think we cannot restrict the existing map either unless there is a
known safety issue.

> 
> Like explicit annotating via struct bpf_map_ops where everything is visible in one
> single place where the map is defined:
> 
> const struct bpf_map_ops array_map_ops = {
>         .map_alloc_check = array_map_alloc_check,
>         [...]
>         .map_flags_fixed = BPF_MAP_IN_MAP_OK,
> };
I am not sure about adding it to bpf_map_ops instead of bpf_types.h.
It will be easier to figure out what map types do not support MAP_IN_MAP (and
other future map's fixed properties) in one place "bpf_types.h" instead of
having to dig into each map src file.

If the objective is to have the future map "consciously" opt-in, how about
keeping the "BPF_MAP_TYPE" name as is but add a fixed_flags param as the
earlier v1 and flip it from NO to OK flag.  It will be clear that,
it is a decision that the new map needs to make instead of a quiet 0
in "struct bpf_map_ops".

For example,
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops, BPF_MAP_IN_MAP_OK)
BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, 0)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops, BPF_MAP_IN_MAP_OK | BPF_MAP_IN_MAP_DYNAMIC_SIZE_OK)

> 
> That way, if someone forgets to add .map_flags_fixed to a new map type, it's okay since
> it's _safe_ to forget to add these flags (and okay to add in future uapi-wise) as opposed
> to the other way round where one can easily miss the opt-out case and potentially crash
> the machine worst case.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h
  2020-05-23  1:00     ` Martin KaFai Lau
@ 2020-05-26 17:54       ` Andrii Nakryiko
  2020-05-29  6:30         ` Martin KaFai Lau
  0 siblings, 1 reply; 10+ messages in thread
From: Andrii Nakryiko @ 2020-05-26 17:54 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Daniel Borkmann, bpf, Alexei Starovoitov, Kernel Team,
	Networking, Andrey Ignatov

On Fri, May 22, 2020 at 6:01 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Sat, May 23, 2020 at 12:22:48AM +0200, Daniel Borkmann wrote:
> > On 5/22/20 4:23 AM, Martin KaFai Lau wrote:
> > [...]
> > >   };
> > > +/* Cannot be used as an inner map */
> > > +#define BPF_MAP_NO_INNER_MAP (1 << 0)
> > > +
> > >   struct bpf_map {
> > >     /* The first two cachelines with read-mostly members of which some
> > >      * are also accessed in fast-path (e.g. ops, max_entries).
> > > @@ -120,6 +123,7 @@ struct bpf_map {
> > >     struct bpf_map_memory memory;
> > >     char name[BPF_OBJ_NAME_LEN];
> > >     u32 btf_vmlinux_value_type_id;
> > > +   u32 properties;
> > >     bool bypass_spec_v1;
> > >     bool frozen; /* write-once; write-protected by freeze_mutex */
> > >     /* 22 bytes hole */
> > > @@ -1037,12 +1041,12 @@ extern const struct file_operations bpf_iter_fops;
> > >   #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
> > >     extern const struct bpf_prog_ops _name ## _prog_ops; \
> > >     extern const struct bpf_verifier_ops _name ## _verifier_ops;
> > > -#define BPF_MAP_TYPE(_id, _ops) \
> > > +#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
> > >     extern const struct bpf_map_ops _ops;
> > >   #define BPF_LINK_TYPE(_id, _name)
> > >   #include <linux/bpf_types.h>
> > >   #undef BPF_PROG_TYPE
> > > -#undef BPF_MAP_TYPE
> > > +#undef BPF_MAP_TYPE_FL
> > >   #undef BPF_LINK_TYPE
> > >   extern const struct bpf_prog_ops bpf_offload_prog_ops;
> > > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> > > index 29d22752fc87..3f32702c9bf4 100644
> > > --- a/include/linux/bpf_types.h
> > > +++ b/include/linux/bpf_types.h
> > > @@ -76,16 +76,25 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
> > >   #endif /* CONFIG_BPF_LSM */
> > >   #endif
> > > +#define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
> > > +
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> > > -BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
> > > +/* prog_array->aux->{type,jited} is a runtime binding.
> > > + * Doing static check alone in the verifier is not enough,
> > > + * so BPF_MAP_NO_INNTER_MAP is needed.
> >
> > typo: INNTER
> Good catch.
>
> >
> > > + */
> > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops,
> > > +           BPF_MAP_NO_INNER_MAP)
> >
> > Probably nit, but what is "FL"? flags? We do have map_flags already, but here the
> > BPF_MAP_NO_INNER_MAP ends up in 'properties' instead. To avoid confusion, it would
> > probably be better to name it 'map_flags_fixed' since this is what it really means;
> > fixed flags that cannot be changed/controlled when creating a map.
> ok. may be BPF_MAP_TYPE_FIXED_FL?
>
> >
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
> > >   #ifdef CONFIG_CGROUPS
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
> > >   #endif
> > >   #ifdef CONFIG_CGROUP_BPF
> > > -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
> > > -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
> > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops,
> > > +           BPF_MAP_NO_INNER_MAP)
> > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops,
> > > +           BPF_MAP_NO_INNER_MAP)
> > >   #endif
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
> > > @@ -116,8 +125,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
> > >   BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
> > >   #if defined(CONFIG_BPF_JIT)
> > > -BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
> > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops,
> > > +           BPF_MAP_NO_INNER_MAP)
> > >   #endif
> > > +#undef BPF_MAP_TYPE
> > >   BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
> > >   BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
> > [...]
> > > diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
> > > index 17738c93bec8..d965a1d328a9 100644
> > > --- a/kernel/bpf/map_in_map.c
> > > +++ b/kernel/bpf/map_in_map.c
> > > @@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
> > >     if (IS_ERR(inner_map))
> > >             return inner_map;
> > > -   /* prog_array->aux->{type,jited} is a runtime binding.
> > > -    * Doing static check alone in the verifier is not enough.
> > > -    */
> > > -   if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
> > > -       inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
> > > -       inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
> > > -       inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
> > > +   if (inner_map->properties & BPF_MAP_NO_INNER_MAP) {
> > >             fdput(f);
> > >             return ERR_PTR(-ENOTSUPP);
> > >     }
> >
> > This whole check here is currently very fragile. For example, given we forbid cgroup
> > local storage here, why do we not forbid socket local storage? What about other maps
> > like stackmap? It's quite unclear if it even works as expected and if there's also a
> > use-case we are aware of. Why not making this an explicit opt-in?
> Re: "cgroup-local-storage", my understanding is,
> cgroup-local-storage is local to the bpf's cgroup that it is running under,
> so it is not ok for a cgroup's bpf to be able to access other cgroup's local
> storage through map-in-map, so they are excluded here.
>
> sk-local-storage does not have this restriction.  For other maps, if there is
> no known safety issue, why restricting it and create unnecessary API
> discrepancy?
>
> I think we cannot restrict the existing map either unless there is a
> known safety issue.
>
> >
> > Like explicit annotating via struct bpf_map_ops where everything is visible in one
> > single place where the map is defined:
> >
> > const struct bpf_map_ops array_map_ops = {
> >         .map_alloc_check = array_map_alloc_check,
> >         [...]
> >         .map_flags_fixed = BPF_MAP_IN_MAP_OK,
> > };
> I am not sure about adding it to bpf_map_ops instead of bpf_types.h.
> It will be easier to figure out what map types do not support MAP_IN_MAP (and
> other future map's fixed properties) in one place "bpf_types.h" instead of
> having to dig into each map src file.

I'm 100% with Daniel here. If we are consolidating such things, I'd
rather have them in one place where differences between maps are
defined, which is ops. Despite an "ops" name, this seems like a
perfect place for specifying all those per-map-type properties and
behaviors. Adding flags into bpf_types.h just splits everything into
two places: bpf_types.h specifies some differences, while ops specify
all the other ones.

Figuring out map-in-map support is just one of many questions one
might ask about differences between map types, I don't think that
justifies adding them to bpf_types.h. Grepping for struct bpf_map_ops
with search context (i.e., -A15 or something like that) should be
enough to get a quick glance at all possible maps and what they
define/override.

It also feels like adding this as bool field for each aspect instead
of a collection of bits is cleaner and a bit more scalable. If we need
to add another property with some parameter/constant, or just enum,
defining one of few possible behaviors, it would be easier to just add
another field, instead of trying to cram that into u32. It also solves
your problem of "at the glance" view of map-in-map support features.
Just name that field unique enough to grep by it :)

>
> If the objective is to have the future map "consciously" opt-in, how about
> keeping the "BPF_MAP_TYPE" name as is but add a fixed_flags param as the
> earlier v1 and flip it from NO to OK flag.  It will be clear that,
> it is a decision that the new map needs to make instead of a quiet 0
> in "struct bpf_map_ops".
>
> For example,
> BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops, BPF_MAP_IN_MAP_OK)
> BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, 0)
> BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops, BPF_MAP_IN_MAP_OK | BPF_MAP_IN_MAP_DYNAMIC_SIZE_OK)
>
> >
> > That way, if someone forgets to add .map_flags_fixed to a new map type, it's okay since
> > it's _safe_ to forget to add these flags (and okay to add in future uapi-wise) as opposed
> > to the other way round where one can easily miss the opt-out case and potentially crash
> > the machine worst case.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h
  2020-05-26 17:54       ` Andrii Nakryiko
@ 2020-05-29  6:30         ` Martin KaFai Lau
  2020-05-29 20:40           ` Andrii Nakryiko
  0 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2020-05-29  6:30 UTC (permalink / raw)
  To: Andrii Nakryiko, Daniel Borkmann
  Cc: bpf, Alexei Starovoitov, Kernel Team, Networking, Andrey Ignatov

On Tue, May 26, 2020 at 10:54:26AM -0700, Andrii Nakryiko wrote:
> On Fri, May 22, 2020 at 6:01 PM Martin KaFai Lau <kafai@fb.com> wrote:
> >
> > On Sat, May 23, 2020 at 12:22:48AM +0200, Daniel Borkmann wrote:
> > > On 5/22/20 4:23 AM, Martin KaFai Lau wrote:
> > > [...]
> > > >   };
> > > > +/* Cannot be used as an inner map */
> > > > +#define BPF_MAP_NO_INNER_MAP (1 << 0)
> > > > +
> > > >   struct bpf_map {
> > > >     /* The first two cachelines with read-mostly members of which some
> > > >      * are also accessed in fast-path (e.g. ops, max_entries).
> > > > @@ -120,6 +123,7 @@ struct bpf_map {
> > > >     struct bpf_map_memory memory;
> > > >     char name[BPF_OBJ_NAME_LEN];
> > > >     u32 btf_vmlinux_value_type_id;
> > > > +   u32 properties;
> > > >     bool bypass_spec_v1;
> > > >     bool frozen; /* write-once; write-protected by freeze_mutex */
> > > >     /* 22 bytes hole */
> > > > @@ -1037,12 +1041,12 @@ extern const struct file_operations bpf_iter_fops;
> > > >   #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
> > > >     extern const struct bpf_prog_ops _name ## _prog_ops; \
> > > >     extern const struct bpf_verifier_ops _name ## _verifier_ops;
> > > > -#define BPF_MAP_TYPE(_id, _ops) \
> > > > +#define BPF_MAP_TYPE_FL(_id, _ops, properties) \
> > > >     extern const struct bpf_map_ops _ops;
> > > >   #define BPF_LINK_TYPE(_id, _name)
> > > >   #include <linux/bpf_types.h>
> > > >   #undef BPF_PROG_TYPE
> > > > -#undef BPF_MAP_TYPE
> > > > +#undef BPF_MAP_TYPE_FL
> > > >   #undef BPF_LINK_TYPE
> > > >   extern const struct bpf_prog_ops bpf_offload_prog_ops;
> > > > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> > > > index 29d22752fc87..3f32702c9bf4 100644
> > > > --- a/include/linux/bpf_types.h
> > > > +++ b/include/linux/bpf_types.h
> > > > @@ -76,16 +76,25 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
> > > >   #endif /* CONFIG_BPF_LSM */
> > > >   #endif
> > > > +#define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0)
> > > > +
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> > > > -BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
> > > > +/* prog_array->aux->{type,jited} is a runtime binding.
> > > > + * Doing static check alone in the verifier is not enough,
> > > > + * so BPF_MAP_NO_INNTER_MAP is needed.
> > >
> > > typo: INNTER
> > Good catch.
> >
> > >
> > > > + */
> > > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops,
> > > > +           BPF_MAP_NO_INNER_MAP)
> > >
> > > Probably nit, but what is "FL"? flags? We do have map_flags already, but here the
> > > BPF_MAP_NO_INNER_MAP ends up in 'properties' instead. To avoid confusion, it would
> > > probably be better to name it 'map_flags_fixed' since this is what it really means;
> > > fixed flags that cannot be changed/controlled when creating a map.
> > ok. may be BPF_MAP_TYPE_FIXED_FL?
> >
> > >
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
> > > >   #ifdef CONFIG_CGROUPS
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
> > > >   #endif
> > > >   #ifdef CONFIG_CGROUP_BPF
> > > > -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
> > > > -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
> > > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops,
> > > > +           BPF_MAP_NO_INNER_MAP)
> > > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops,
> > > > +           BPF_MAP_NO_INNER_MAP)
> > > >   #endif
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
> > > > @@ -116,8 +125,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
> > > >   BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
> > > >   #if defined(CONFIG_BPF_JIT)
> > > > -BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
> > > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops,
> > > > +           BPF_MAP_NO_INNER_MAP)
> > > >   #endif
> > > > +#undef BPF_MAP_TYPE
> > > >   BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
> > > >   BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
> > > [...]
> > > > diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
> > > > index 17738c93bec8..d965a1d328a9 100644
> > > > --- a/kernel/bpf/map_in_map.c
> > > > +++ b/kernel/bpf/map_in_map.c
> > > > @@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
> > > >     if (IS_ERR(inner_map))
> > > >             return inner_map;
> > > > -   /* prog_array->aux->{type,jited} is a runtime binding.
> > > > -    * Doing static check alone in the verifier is not enough.
> > > > -    */
> > > > -   if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
> > > > -       inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
> > > > -       inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE ||
> > > > -       inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
> > > > +   if (inner_map->properties & BPF_MAP_NO_INNER_MAP) {
> > > >             fdput(f);
> > > >             return ERR_PTR(-ENOTSUPP);
> > > >     }
> > >
> > > This whole check here is currently very fragile. For example, given we forbid cgroup
> > > local storage here, why do we not forbid socket local storage? What about other maps
> > > like stackmap? It's quite unclear if it even works as expected and if there's also a
> > > use-case we are aware of. Why not making this an explicit opt-in?
> > Re: "cgroup-local-storage", my understanding is,
> > cgroup-local-storage is local to the bpf's cgroup that it is running under,
> > so it is not ok for a cgroup's bpf to be able to access other cgroup's local
> > storage through map-in-map, so they are excluded here.
> >
> > sk-local-storage does not have this restriction.  For other maps, if there is
> > no known safety issue, why restricting it and create unnecessary API
> > discrepancy?
> >
> > I think we cannot restrict the existing map either unless there is a
> > known safety issue.
> >
> > >
> > > Like explicit annotating via struct bpf_map_ops where everything is visible in one
> > > single place where the map is defined:
> > >
> > > const struct bpf_map_ops array_map_ops = {
> > >         .map_alloc_check = array_map_alloc_check,
> > >         [...]
> > >         .map_flags_fixed = BPF_MAP_IN_MAP_OK,
> > > };
> > I am not sure about adding it to bpf_map_ops instead of bpf_types.h.
> > It will be easier to figure out what map types do not support MAP_IN_MAP (and
> > other future map's fixed properties) in one place "bpf_types.h" instead of
> > having to dig into each map src file.
> 
> I'm 100% with Daniel here. If we are consolidating such things, I'd
> rather have them in one place where differences between maps are
> defined, which is ops. Despite an "ops" name, this seems like a
> perfect place for specifying all those per-map-type properties and
> behaviors. Adding flags into bpf_types.h just splits everything into
> two places: bpf_types.h specifies some differences, while ops specify
> all the other ones.
> 
> Figuring out map-in-map support is just one of many questions one
> might ask about differences between map types, I don't think that
> justifies adding them to bpf_types.h. Grepping for struct bpf_map_ops
> with search context (i.e., -A15 or something like that) should be
> enough to get a quick glance at all possible maps and what they
> define/override.
> 
> It also feels like adding this as bool field for each aspect instead
> of a collection of bits is cleaner and a bit more scalable. If we need
> to add another property with some parameter/constant, or just enum,
> defining one of few possible behaviors, it would be easier to just add
> another field, instead of trying to cram that into u32. It also solves
> your problem of "at the glance" view of map-in-map support features.
> Just name that field unique enough to grep by it :)
How about another way.  What patch 2 want is each map could have its own
bpf_map_meta_equal().  Instead of adding 2 flags, add the bpf_map_meta_equal()
as a ops to bpf_map_ops.  Each map supports to be used as an inner_map
needs to set this ops.  Then it will be an opt-in.
A default implementation can be provided for most maps' use.
The maps (e.g. arraymap and other future maps) that has different requirement
can implement its own.  For the existing maps, when we address those
limitations (e.g. arraymap's gen_lookup) later,  we can then change its
bpf_map_meta_equal.

Thoughts?

> 
> >
> > If the objective is to have the future map "consciously" opt-in, how about
> > keeping the "BPF_MAP_TYPE" name as is but add a fixed_flags param as the
> > earlier v1 and flip it from NO to OK flag.  It will be clear that,
> > it is a decision that the new map needs to make instead of a quiet 0
> > in "struct bpf_map_ops".
> >
> > For example,
> > BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops, BPF_MAP_IN_MAP_OK)
> > BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, 0)
> > BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops, BPF_MAP_IN_MAP_OK | BPF_MAP_IN_MAP_DYNAMIC_SIZE_OK)
> >
> > >
> > > That way, if someone forgets to add .map_flags_fixed to a new map type, it's okay since
> > > it's _safe_ to forget to add these flags (and okay to add in future uapi-wise) as opposed
> > > to the other way round where one can easily miss the opt-out case and potentially crash
> > > the machine worst case.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h
  2020-05-29  6:30         ` Martin KaFai Lau
@ 2020-05-29 20:40           ` Andrii Nakryiko
  0 siblings, 0 replies; 10+ messages in thread
From: Andrii Nakryiko @ 2020-05-29 20:40 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Daniel Borkmann, bpf, Alexei Starovoitov, Kernel Team,
	Networking, Andrey Ignatov

On Thu, May 28, 2020 at 11:31 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Tue, May 26, 2020 at 10:54:26AM -0700, Andrii Nakryiko wrote:
> > On Fri, May 22, 2020 at 6:01 PM Martin KaFai Lau <kafai@fb.com> wrote:
> > >
> > > On Sat, May 23, 2020 at 12:22:48AM +0200, Daniel Borkmann wrote:
> > > > On 5/22/20 4:23 AM, Martin KaFai Lau wrote:
> > > > [...]
> > > > >   };

[...]


> > >
> > > >
> > > > Like explicit annotating via struct bpf_map_ops where everything is visible in one
> > > > single place where the map is defined:
> > > >
> > > > const struct bpf_map_ops array_map_ops = {
> > > >         .map_alloc_check = array_map_alloc_check,
> > > >         [...]
> > > >         .map_flags_fixed = BPF_MAP_IN_MAP_OK,
> > > > };
> > > I am not sure about adding it to bpf_map_ops instead of bpf_types.h.
> > > It will be easier to figure out what map types do not support MAP_IN_MAP (and
> > > other future map's fixed properties) in one place "bpf_types.h" instead of
> > > having to dig into each map src file.
> >
> > I'm 100% with Daniel here. If we are consolidating such things, I'd
> > rather have them in one place where differences between maps are
> > defined, which is ops. Despite an "ops" name, this seems like a
> > perfect place for specifying all those per-map-type properties and
> > behaviors. Adding flags into bpf_types.h just splits everything into
> > two places: bpf_types.h specifies some differences, while ops specify
> > all the other ones.
> >
> > Figuring out map-in-map support is just one of many questions one
> > might ask about differences between map types, I don't think that
> > justifies adding them to bpf_types.h. Grepping for struct bpf_map_ops
> > with search context (i.e., -A15 or something like that) should be
> > enough to get a quick glance at all possible maps and what they
> > define/override.
> >
> > It also feels like adding this as bool field for each aspect instead
> > of a collection of bits is cleaner and a bit more scalable. If we need
> > to add another property with some parameter/constant, or just enum,
> > defining one of few possible behaviors, it would be easier to just add
> > another field, instead of trying to cram that into u32. It also solves
> > your problem of "at the glance" view of map-in-map support features.
> > Just name that field unique enough to grep by it :)
> How about another way.  What patch 2 want is each map could have its own
> bpf_map_meta_equal().  Instead of adding 2 flags, add the bpf_map_meta_equal()
> as a ops to bpf_map_ops.  Each map supports to be used as an inner_map
> needs to set this ops.  Then it will be an opt-in.
> A default implementation can be provided for most maps' use.
> The maps (e.g. arraymap and other future maps) that has different requirement
> can implement its own.  For the existing maps, when we address those
> limitations (e.g. arraymap's gen_lookup) later,  we can then change its
> bpf_map_meta_equal.
>
> Thoughts?
>

I think that would work as well, I don't mind.

> >
> > >
> > > If the objective is to have the future map "consciously" opt-in, how about
> > > keeping the "BPF_MAP_TYPE" name as is but add a fixed_flags param as the
> > > earlier v1 and flip it from NO to OK flag.  It will be clear that,
> > > it is a decision that the new map needs to make instead of a quiet 0
> > > in "struct bpf_map_ops".
> > >
> > > For example,
> > > BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops, BPF_MAP_IN_MAP_OK)
> > > BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, 0)
> > > BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops, BPF_MAP_IN_MAP_OK | BPF_MAP_IN_MAP_DYNAMIC_SIZE_OK)
> > >
> > > >
> > > > That way, if someone forgets to add .map_flags_fixed to a new map type, it's okay since
> > > > it's _safe_ to forget to add these flags (and okay to add in future uapi-wise) as opposed
> > > > to the other way round where one can easily miss the opt-out case and potentially crash
> > > > the machine worst case.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-05-29 20:41 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-22  2:23 [PATCH v2 bpf-next 0/3] bpf: Allow inner map with different max_entries Martin KaFai Lau
2020-05-22  2:23 ` [PATCH v2 bpf-next 1/3] bpf: Consolidate inner-map-compatible properties into bpf_types.h Martin KaFai Lau
2020-05-22 22:22   ` Daniel Borkmann
2020-05-23  1:00     ` Martin KaFai Lau
2020-05-26 17:54       ` Andrii Nakryiko
2020-05-29  6:30         ` Martin KaFai Lau
2020-05-29 20:40           ` Andrii Nakryiko
2020-05-22  2:23 ` [PATCH v2 bpf-next 2/3] bpf: Relax the max_entries check for inner map Martin KaFai Lau
2020-05-22 22:26   ` Daniel Borkmann
2020-05-22  2:23 ` [PATCH v2 bpf-next 3/3] bpf: selftests: Add test for different inner map size Martin KaFai Lau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).