bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
@ 2020-06-03 15:44 Jesper Dangaard Brouer
  2020-06-03 16:22 ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: Jesper Dangaard Brouer @ 2020-06-03 15:44 UTC (permalink / raw)
  To: David Ahern, bpf, Alexei Starovoitov
  Cc: Jesper Dangaard Brouer, netdev, Daniel Borkmann, Andrii Nakryiko,
	Lorenzo Bianconi

The recent commit fbee97feed9b ("bpf: Add support to attach bpf program to a
devmap entry"), introduced ability to attach (and run) a separate XDP
bpf_prog for each devmap entry. A bpf_prog is added via a file-descriptor,
thus not using the feature requires using value minus-1. The UAPI is
extended via tail-extending struct bpf_devmap_val and using map->value_size
to determine the feature set.

There is a specific problem with dev_map_can_have_prog() check, which is
called from net/core/dev.c in generic_xdp_install() to refuse usage of
devmap's from generic-XDP that support these bpf_prog's. The check is size
based. This means that all newer features will be blocked from being use by
generic-XDP.

This patch allows userspace to skip handling of 'bpf_prog' on map-inserts.
The feature can be skipped, via not including the member 'bpf_prog' in the
map-value struct, which is propagated/described via BTF.

Fixes: fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com
---
 kernel/bpf/devmap.c                                |  295 ++++++++++++++++----
 .../bpf/progs/test_xdp_with_devmap_helpers.c       |    4 
 2 files changed, 235 insertions(+), 64 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 854b09beb16b..8a223557123c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -48,6 +48,7 @@
 #include <net/xdp.h>
 #include <linux/filter.h>
 #include <trace/events/xdp.h>
+#include <linux/btf.h>
 
 #define DEV_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -60,10 +61,19 @@ struct xdp_dev_bulk_queue {
 	unsigned int count;
 };
 
-/* DEVMAP values */
+/* DEVMAP map-value layout.
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * BPF-prog side have read-only access to this memory.
+ *
+ * The layout might be different than below, because some struct members are
+ * optional.  This is made dynamic by requiring userspace provides an BTF
+ * description of the struct layout, when creating the BPF-map. Struct names
+ * are important and part of API, as BTF use these names to identify members.
+ */
 struct bpf_devmap_val {
 	u32 ifindex;   /* device index */
-	union {
+	union bpf_prog_union {
 		int fd;  /* prog fd on map write */
 		u32 id;  /* prog id on map read */
 	} bpf_prog;
@@ -76,13 +86,21 @@ struct bpf_dtab_netdev {
 	struct bpf_prog *xdp_prog;
 	struct rcu_head rcu;
 	unsigned int idx;
-	struct bpf_devmap_val val;
+	struct bpf_devmap_val val; /* actual layout defined by userspace BTF */
+};
+
+struct bpf_devmap_val_cfg {
+	struct {
+		int ifindex;
+		int bpf_prog;
+	} btf_offset;
 };
 
 struct bpf_dtab {
 	struct bpf_map map;
 	struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
 	struct list_head list;
+	struct bpf_devmap_val_cfg cfg;
 
 	/* these are only used for DEVMAP_HASH type maps */
 	struct hlist_head *dev_index_head;
@@ -116,20 +134,27 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
 
 static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 {
-	u32 valsize = attr->value_size;
 	u64 cost = 0;
 	int err;
 
-	/* check sanity of attributes. 2 value sizes supported:
-	 * 4 bytes: ifindex
-	 * 8 bytes: ifindex + prog fd
-	 */
+	/* Value contents validated in dev_map_check_btf */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    (valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
-	     valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
+	    attr->value_size > sizeof(struct bpf_devmap_val) ||
 	    attr->map_flags & ~DEV_CREATE_FLAG_MASK)
 		return -EINVAL;
 
+	/* Enforce BTF for userspace, unless dealing with legacy kABI */
+	if (attr->value_size != 4 &&
+	    (!attr->btf_key_type_id || !attr->btf_value_type_id))
+		return -EOPNOTSUPP;
+
+	/* Mark BTF offset's as invalid */
+	dtab->cfg.btf_offset.ifindex  = -1;
+	dtab->cfg.btf_offset.bpf_prog = -1;
+
+	if (attr->value_size == 4 &&  !attr->btf_value_type_id)
+		dtab->cfg.btf_offset.ifindex  = 0;
+
 	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
 	 * verifier prevents writes from the BPF side
 	 */
@@ -199,6 +224,118 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	return &dtab->map;
 }
 
+struct expect {
+	u8 btf_kind;
+	bool mandatory;
+	int bit_offset;
+	int size;
+	const char *name;
+};
+
+/* Note: refactor and move to kernel/bpf/btf.c when other users appear */
+static int btf_find_expect_layout_offset(const struct btf *btf,
+					 const struct btf_type *value_type,
+					 const struct expect *layout)
+{
+	const struct btf_member *member;
+	u32 i, off = -ENOENT;
+
+	for_each_member(i, value_type, member) {
+		const struct btf_type *member_type;
+		const char *member_name;
+
+		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
+		if (BTF_INFO_KIND(member_type->info) != layout->btf_kind) {
+			continue;
+		}
+
+		member_name = btf_name_by_offset(btf, member->name_off);
+		if (!member_name)
+			return -EINVAL;
+
+		if (strcmp(layout->name, member_name))
+			continue;
+
+		if (layout->size > 0 &&  // btf_type_has_size(member_type) &&
+		    member_type->size != layout->size)
+			continue;
+
+		off = btf_member_bit_offset(value_type, member);
+		if (layout->bit_offset > 0 &&
+		    layout->bit_offset != off) {
+			off = -ENOENT;
+			continue;
+		}
+
+		if (off % 8) /* valid C code cannot generate such BTF */
+			return -EINVAL;
+
+		return off;
+	}
+	return off;
+}
+
+/* Expected BTF layout that match struct bpf_devmap_val */
+static const struct expect layout[] = {
+	{BTF_KIND_INT,		true,	-1,	 4,	"ifindex"},
+	{BTF_KIND_UNION,	false,	-1,	 4,	"bpf_prog"}
+};
+
+static int dev_map_check_btf(const struct bpf_map *map,
+			     const struct btf *btf,
+			     const struct btf_type *key_type,
+			     const struct btf_type *value_type)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	u32 found_members_cnt = 0;
+	u32 int_data;
+	int off;
+	u32 i;
+
+	/* Validate KEY type and size */
+	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+		return -EOPNOTSUPP;
+
+	int_data = *(u32 *)(key_type + 1);
+	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data) != 0)
+		return -EOPNOTSUPP;
+
+	if (BTF_INFO_KIND(value_type->info) != BTF_KIND_STRUCT)
+		return -EOPNOTSUPP;
+
+	/* Struct/union members in BTF must not exceed (max) expected members */
+	if (btf_type_vlen(value_type) > ARRAY_SIZE(layout))
+			return -E2BIG;
+
+	for (i = 0; i < ARRAY_SIZE(layout); i++) {
+		off = btf_find_expect_layout_offset(btf, value_type, &layout[i]);
+
+		if (off < 0 && layout[i].mandatory)
+			return -EUCLEAN;
+
+		if (off >= 0)
+			found_members_cnt++;
+
+		/* Transfer layout config to map */
+		switch (i) {
+		case 0:
+			dtab->cfg.btf_offset.ifindex = off;
+			break;
+		case 1:
+			dtab->cfg.btf_offset.bpf_prog = off;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Detect if BTF/vlen have members that were not found */
+	if (btf_type_vlen(value_type) > found_members_cnt)
+		return -E2BIG;
+
+	return 0;
+}
+
 static void dev_map_free(struct bpf_map *map)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
@@ -338,16 +475,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
 	return -ENOENT;
 }
 
-bool dev_map_can_have_prog(struct bpf_map *map)
-{
-	if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
-	     map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
-	    map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
-		return true;
-
-	return false;
-}
-
 static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
 	struct net_device *dev = bq->dev;
@@ -601,42 +728,101 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
 	return ret;
 }
 
+static inline bool map_value_has_bpf_prog(const struct bpf_dtab *dtab)
+{
+	return dtab->cfg.btf_offset.bpf_prog >= 0;
+}
+
+bool dev_map_can_have_prog(struct bpf_map *map)
+{
+	struct bpf_dtab *dtab;
+
+	if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
+	    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH)
+		return false;
+
+	dtab = container_of(map, struct bpf_dtab, map);
+	if (map_value_has_bpf_prog(dtab))
+		return true;
+
+	return false;
+}
+
+static union bpf_prog_union *map_value_ptr_bpf_prog(const struct bpf_dtab *dtab,
+						    void* val)
+{
+	u32 off = dtab->cfg.btf_offset.bpf_prog / 8;
+	union bpf_prog_union *bpf_prog = NULL;
+
+	bpf_prog  = (union bpf_prog_union *)(val + off);
+	return bpf_prog;
+}
+
+static int map_value_ifindex(const struct bpf_dtab *dtab, void* val)
+{
+	int ifindex = 0;
+	u32 off;
+
+	if (dtab->cfg.btf_offset.ifindex < 0)
+		goto out;
+
+	off = dtab->cfg.btf_offset.ifindex / 8;
+	ifindex = *(int *)(val + off);
+out:
+	return ifindex;
+}
+
 static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
-						    struct bpf_dtab *dtab,
-						    struct bpf_devmap_val *val,
+						    struct bpf_map *map,
+						    void *value,
 						    unsigned int idx)
 {
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_prog *prog = NULL;
 	struct bpf_dtab_netdev *dev;
+	int ifindex;
 
-	dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
+	dev = kzalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
 			   dtab->map.numa_node);
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev->dev = dev_get_by_index(net, val->ifindex);
+	/* Already verified value_size <= sizeof bpf_devmap_val */
+	memcpy(&dev->val, value, map->value_size);
+
+	/* Member: ifindex is mandatory, both BTF and kABI */
+	ifindex = map_value_ifindex(dtab, &dev->val);
+	if (!ifindex)
+		return ERR_PTR(-EINVAL);
+
+	dev->dev = dev_get_by_index(net, ifindex);
 	if (!dev->dev)
 		goto err_out;
 
-	if (val->bpf_prog.fd >= 0) {
-		prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
-					     BPF_PROG_TYPE_XDP, false);
-		if (IS_ERR(prog))
-			goto err_put_dev;
-		if (prog->expected_attach_type != BPF_XDP_DEVMAP)
-			goto err_put_prog;
-	}
+	/* Member: bpf_prog union is optional */
+	if (map_value_has_bpf_prog(dtab)) {
+		union bpf_prog_union *bpf_prog;
 
+		bpf_prog = map_value_ptr_bpf_prog(dtab, &dev->val);
+
+		if (bpf_prog->fd >= 0) {
+			prog = bpf_prog_get_type_dev(bpf_prog->fd,
+						     BPF_PROG_TYPE_XDP, false);
+			if (IS_ERR(prog))
+				goto err_put_dev;
+			if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+				goto err_put_prog;
+		}
+		if (prog) {
+			dev->xdp_prog = prog;
+			bpf_prog->id = prog->aux->id;
+		} else {
+			dev->xdp_prog = NULL;
+			bpf_prog->id = 0;
+		}
+	}
 	dev->idx = idx;
 	dev->dtab = dtab;
-	if (prog) {
-		dev->xdp_prog = prog;
-		dev->val.bpf_prog.id = prog->aux->id;
-	} else {
-		dev->xdp_prog = NULL;
-		dev->val.bpf_prog.id = 0;
-	}
-	dev->val.ifindex = val->ifindex;
 
 	return dev;
 err_put_prog:
@@ -652,7 +838,6 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 				 void *key, void *value, u64 map_flags)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-	struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
 	struct bpf_dtab_netdev *dev, *old_dev;
 	u32 i = *(u32 *)key;
 
@@ -663,19 +848,9 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 	if (unlikely(map_flags == BPF_NOEXIST))
 		return -EEXIST;
 
-	/* already verified value_size <= sizeof val */
-	memcpy(&val, value, map->value_size);
-
-	if (!val.ifindex) {
-		dev = NULL;
-		/* can not specify fd if ifindex is 0 */
-		if (val.bpf_prog.fd != -1)
-			return -EINVAL;
-	} else {
-		dev = __dev_map_alloc_node(net, dtab, &val, i);
-		if (IS_ERR(dev))
-			return PTR_ERR(dev);
-	}
+	dev = __dev_map_alloc_node(net, map, value, i);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
 
 	/* Use call_rcu() here to ensure rcu critical sections have completed
 	 * Remembering the driver side flush operation will happen before the
@@ -699,16 +874,12 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 				     void *key, void *value, u64 map_flags)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-	struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
 	struct bpf_dtab_netdev *dev, *old_dev;
 	u32 idx = *(u32 *)key;
 	unsigned long flags;
 	int err = -EEXIST;
 
-	/* already verified value_size <= sizeof val */
-	memcpy(&val, value, map->value_size);
-
-	if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
+	if (unlikely(map_flags > BPF_EXIST))
 		return -EINVAL;
 
 	spin_lock_irqsave(&dtab->index_lock, flags);
@@ -717,7 +888,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 	if (old_dev && (map_flags & BPF_NOEXIST))
 		goto out_err;
 
-	dev = __dev_map_alloc_node(net, dtab, &val, idx);
+	dev = __dev_map_alloc_node(net, map, value, idx);
 	if (IS_ERR(dev)) {
 		err = PTR_ERR(dev);
 		goto out_err;
@@ -762,7 +933,7 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_lookup_elem = dev_map_lookup_elem,
 	.map_update_elem = dev_map_update_elem,
 	.map_delete_elem = dev_map_delete_elem,
-	.map_check_btf = map_check_no_btf,
+	.map_check_btf = dev_map_check_btf,
 };
 
 const struct bpf_map_ops dev_map_hash_ops = {
@@ -772,7 +943,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
 	.map_lookup_elem = dev_map_hash_lookup_elem,
 	.map_update_elem = dev_map_hash_update_elem,
 	.map_delete_elem = dev_map_hash_delete_elem,
-	.map_check_btf = map_check_no_btf,
+	.map_check_btf = dev_map_check_btf,
 };
 
 static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index deef0e050863..d2481ec10764 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -5,8 +5,8 @@
 
 struct {
 	__uint(type, BPF_MAP_TYPE_DEVMAP);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__type(key, u32);
+	__type(value, struct bpf_devmap_val);
 	__uint(max_entries, 4);
 } dm_ports SEC(".maps");
 



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-03 15:44 [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF Jesper Dangaard Brouer
@ 2020-06-03 16:22 ` Alexei Starovoitov
  2020-06-04 15:48   ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2020-06-03 16:22 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: David Ahern, bpf, netdev, Daniel Borkmann, Andrii Nakryiko,
	Lorenzo Bianconi

On Wed, Jun 03, 2020 at 05:44:43PM +0200, Jesper Dangaard Brouer wrote:
> The recent commit fbee97feed9b ("bpf: Add support to attach bpf program to a
> devmap entry"), introduced ability to attach (and run) a separate XDP
> bpf_prog for each devmap entry. A bpf_prog is added via a file-descriptor,
> thus not using the feature requires using value minus-1. The UAPI is
> extended via tail-extending struct bpf_devmap_val and using map->value_size
> to determine the feature set.
> 
> There is a specific problem with dev_map_can_have_prog() check, which is
> called from net/core/dev.c in generic_xdp_install() to refuse usage of
> devmap's from generic-XDP that support these bpf_prog's. The check is size
> based. This means that all newer features will be blocked from being use by
> generic-XDP.
> 
> This patch allows userspace to skip handling of 'bpf_prog' on map-inserts.
> The feature can be skipped, via not including the member 'bpf_prog' in the
> map-value struct, which is propagated/described via BTF.
> 
> Fixes: fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry")
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com

The patch makes no sense to me.
please expose 'struct struct bpf_devmap_val' in uapi/bpf.h
That's what it is whether you want to acknowledge that or not.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-03 16:22 ` Alexei Starovoitov
@ 2020-06-04 15:48   ` Jesper Dangaard Brouer
  2020-06-04 16:40     ` David Ahern
  0 siblings, 1 reply; 10+ messages in thread
From: Jesper Dangaard Brouer @ 2020-06-04 15:48 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: David Ahern, bpf, netdev, Daniel Borkmann, Andrii Nakryiko,
	Lorenzo Bianconi, brouer, David Miller

On Wed, 3 Jun 2020 09:22:57 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Wed, Jun 03, 2020 at 05:44:43PM +0200, Jesper Dangaard Brouer wrote:
> > The recent commit fbee97feed9b ("bpf: Add support to attach bpf program to a
> > devmap entry"), introduced ability to attach (and run) a separate XDP
> > bpf_prog for each devmap entry. A bpf_prog is added via a file-descriptor,
> > thus not using the feature requires using value minus-1. The UAPI is
> > extended via tail-extending struct bpf_devmap_val and using map->value_size
> > to determine the feature set.
> > 
> > There is a specific problem with dev_map_can_have_prog() check, which is
> > called from net/core/dev.c in generic_xdp_install() to refuse usage of
> > devmap's from generic-XDP that support these bpf_prog's. The check is size
> > based. This means that all newer features will be blocked from being use by
> > generic-XDP.
> > 
> > This patch allows userspace to skip handling of 'bpf_prog' on map-inserts.
> > The feature can be skipped, via not including the member 'bpf_prog' in the
> > map-value struct, which is propagated/described via BTF.
> > 
> > Fixes: fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry")
> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com  
> 
> The patch makes no sense to me.

Hmm, that is not a very constructive answer, and it doesn't help me
to improve and move forward with the code.  I interpret that you think
my approach is completely wrong, but it would have been nice to
understand why.  I will give up on this approach, also given bpf-next
is closed now.


> please expose 'struct struct bpf_devmap_val' in uapi/bpf.h
> That's what it is whether you want to acknowledge that or not.

I will NOT send a patch that expose this in uapi/bpf.h.  As I explained
before, this caused the issues for my userspace application, that
automatically picked-up struct bpf_devmap_val, and started to fail
(with no code changes), because it needed minus-1 as input.  I fear
that this will cause more work for me later, when I have to helpout and
support end-users on e.g. xdp-newbies list, as it will not be obvious
to end-users why their programs map-insert start to fail.  I have given
up, so I will not NACK anyone sending such a patch.

Why is it we need to support file-descriptor zero as a valid
file-descriptor for a bpf-prog?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-04 15:48   ` Jesper Dangaard Brouer
@ 2020-06-04 16:40     ` David Ahern
  2020-06-04 17:33       ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: David Ahern @ 2020-06-04 16:40 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Alexei Starovoitov
  Cc: bpf, netdev, Daniel Borkmann, Andrii Nakryiko, Lorenzo Bianconi,
	David Miller

On 6/4/20 9:48 AM, Jesper Dangaard Brouer wrote:
> I will NOT send a patch that expose this in uapi/bpf.h.  As I explained
> before, this caused the issues for my userspace application, that
> automatically picked-up struct bpf_devmap_val, and started to fail
> (with no code changes), because it needed minus-1 as input.  I fear
> that this will cause more work for me later, when I have to helpout and
> support end-users on e.g. xdp-newbies list, as it will not be obvious
> to end-users why their programs map-insert start to fail.  I have given
> up, so I will not NACK anyone sending such a patch.
> 
> Why is it we need to support file-descriptor zero as a valid
> file-descriptor for a bpf-prog?

That was a nice property of using the id instead of fd. And the init to
-1 is not unique to this; adopters of the bpf_set_link_xdp_fd_opts for
example have to do the same.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-04 16:40     ` David Ahern
@ 2020-06-04 17:33       ` Alexei Starovoitov
  2020-06-05  8:23         ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2020-06-04 17:33 UTC (permalink / raw)
  To: David Ahern
  Cc: Jesper Dangaard Brouer, bpf, netdev, Daniel Borkmann,
	Andrii Nakryiko, Lorenzo Bianconi, David Miller

On Thu, Jun 04, 2020 at 10:40:06AM -0600, David Ahern wrote:
> On 6/4/20 9:48 AM, Jesper Dangaard Brouer wrote:
> > I will NOT send a patch that expose this in uapi/bpf.h.  As I explained
> > before, this caused the issues for my userspace application, that
> > automatically picked-up struct bpf_devmap_val, and started to fail
> > (with no code changes), because it needed minus-1 as input.  I fear
> > that this will cause more work for me later, when I have to helpout and
> > support end-users on e.g. xdp-newbies list, as it will not be obvious
> > to end-users why their programs map-insert start to fail.  I have given
> > up, so I will not NACK anyone sending such a patch.

Jesper,

you gave wrong direction to David during development of the patches and
now the devmap uapi is suffering the consequences.

> > 
> > Why is it we need to support file-descriptor zero as a valid
> > file-descriptor for a bpf-prog?
> 
> That was a nice property of using the id instead of fd. And the init to
> -1 is not unique to this; adopters of the bpf_set_link_xdp_fd_opts for
> example have to do the same.

I think it's better to adopt "fd==0 -> invalid" approach.
It won't be unique here. We're already using it in other places in bpf syscall.
I agree with Jesper that requiring -1 init of 2nd field is quite ugly
and inconvenient.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-04 17:33       ` Alexei Starovoitov
@ 2020-06-05  8:23         ` Jesper Dangaard Brouer
  2020-06-05 11:01           ` Toke Høiland-Jørgensen
  2020-06-05 16:58           ` Alexei Starovoitov
  0 siblings, 2 replies; 10+ messages in thread
From: Jesper Dangaard Brouer @ 2020-06-05  8:23 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: David Ahern, bpf, netdev, Daniel Borkmann, Andrii Nakryiko,
	Lorenzo Bianconi, David Miller, brouer

On Thu, 4 Jun 2020 10:33:41 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Thu, Jun 04, 2020 at 10:40:06AM -0600, David Ahern wrote:
> > On 6/4/20 9:48 AM, Jesper Dangaard Brouer wrote:  
> > > I will NOT send a patch that expose this in uapi/bpf.h.  As I explained
> > > before, this caused the issues for my userspace application, that
> > > automatically picked-up struct bpf_devmap_val, and started to fail
> > > (with no code changes), because it needed minus-1 as input.  I fear
> > > that this will cause more work for me later, when I have to helpout and
> > > support end-users on e.g. xdp-newbies list, as it will not be obvious
> > > to end-users why their programs map-insert start to fail.  I have given
> > > up, so I will not NACK anyone sending such a patch.  
> 
> Jesper,
> 
> you gave wrong direction to David during development of the patches and
> now the devmap uapi is suffering the consequences.
> 
> > > 
> > > Why is it we need to support file-descriptor zero as a valid
> > > file-descriptor for a bpf-prog?  
> > 
> > That was a nice property of using the id instead of fd. And the init to
> > -1 is not unique to this; adopters of the bpf_set_link_xdp_fd_opts for
> > example have to do the same.  
> 
> I think it's better to adopt "fd==0 -> invalid" approach.
> It won't be unique here. We're already using it in other places in bpf syscall.
> I agree with Jesper that requiring -1 init of 2nd field is quite ugly
> and inconvenient.

Great. If we can remove this requirement of -1 init (and let zero mean
feature isn't used), then I'm all for exposing expose in uapi/bpf.h.


For future extensions there is still a problem/challenge in
dev_map_can_have_prog() that blocks generic-XDP for using future
extensions.  BUT next person extending devmap can deal with that, so
it's not something we need to deal with now.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-05  8:23         ` Jesper Dangaard Brouer
@ 2020-06-05 11:01           ` Toke Høiland-Jørgensen
  2020-06-05 17:01             ` Alexei Starovoitov
  2020-06-05 16:58           ` Alexei Starovoitov
  1 sibling, 1 reply; 10+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-06-05 11:01 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Alexei Starovoitov
  Cc: David Ahern, bpf, netdev, Daniel Borkmann, Andrii Nakryiko,
	Lorenzo Bianconi, David Miller, brouer

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Thu, 4 Jun 2020 10:33:41 -0700
> Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>
>> On Thu, Jun 04, 2020 at 10:40:06AM -0600, David Ahern wrote:
>> > On 6/4/20 9:48 AM, Jesper Dangaard Brouer wrote:  
>> > > I will NOT send a patch that expose this in uapi/bpf.h.  As I explained
>> > > before, this caused the issues for my userspace application, that
>> > > automatically picked-up struct bpf_devmap_val, and started to fail
>> > > (with no code changes), because it needed minus-1 as input.  I fear
>> > > that this will cause more work for me later, when I have to helpout and
>> > > support end-users on e.g. xdp-newbies list, as it will not be obvious
>> > > to end-users why their programs map-insert start to fail.  I have given
>> > > up, so I will not NACK anyone sending such a patch.  
>> 
>> Jesper,
>> 
>> you gave wrong direction to David during development of the patches and
>> now the devmap uapi is suffering the consequences.
>> 
>> > > 
>> > > Why is it we need to support file-descriptor zero as a valid
>> > > file-descriptor for a bpf-prog?  
>> > 
>> > That was a nice property of using the id instead of fd. And the init to
>> > -1 is not unique to this; adopters of the bpf_set_link_xdp_fd_opts for
>> > example have to do the same.  
>> 
>> I think it's better to adopt "fd==0 -> invalid" approach.
>> It won't be unique here. We're already using it in other places in bpf syscall.
>> I agree with Jesper that requiring -1 init of 2nd field is quite ugly
>> and inconvenient.
>
> Great. If we can remove this requirement of -1 init (and let zero mean
> feature isn't used), then I'm all for exposing expose in uapi/bpf.h.

If we're going to officially deprecate fd 0 as a valid BPF fd, we should
at least make sure users don't end up with such an fd after opening a
BPF object. Not sure how the fd number assignment works, but could we
make sure that the kernel never returns fd 0 for a BPF program/map?

Alternatively, we could add a check in libbpf and either reject the
call, or just call dup() before passing the fd to the kernel.

Right now it's quite trivial to get a BPF program ref with fd0 - all you
have to do is open a BPF program is the first thing you do after closing
stdin (like a daemon might). I'd really rather not have to help anyone
debug that...

-Toke


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-05  8:23         ` Jesper Dangaard Brouer
  2020-06-05 11:01           ` Toke Høiland-Jørgensen
@ 2020-06-05 16:58           ` Alexei Starovoitov
  2020-06-09 12:19             ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2020-06-05 16:58 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: David Ahern, bpf, Network Development, Daniel Borkmann,
	Andrii Nakryiko, Lorenzo Bianconi, David Miller

On Fri, Jun 5, 2020 at 1:23 AM Jesper Dangaard Brouer <brouer@redhat.com> wrote:
>
> Great. If we can remove this requirement of -1 init (and let zero mean
> feature isn't used), then I'm all for exposing expose in uapi/bpf.h.

Not having it in bpf.h doesn't magically make it invisible.
It's uapi because user space C sources rely on its fixed format.
vmlinux.h contains all kernel types. both uapi and kernel internal.
devmap selftest taking uapi 'struct bpf_devmap_val' from vmlinux.h is
an awful hack.
I prefer to keep vmlinux.h usage to bpf programs only.
User space C code should interface with kernel via proper uapi headers.
When vmlinux.h is used by bpf C program it's completely different from
user space C code doing the same, because llvm emits relocations for
bpf prog and libbpf adjusts them.
So doing 'foo->bar' in bpf C is specific to target kernel, whereas
user C code 'foo->bar' is a hard constant which bakes it into uapi
that kernel has to keep backwards compatible.
If in some distant future we teach both gcc and clang to do bpf-style
relocations for x86 and teach ld.so to adjust them then we can dream
about very differently looking kernel/user interfaces.
Right now any struct used by user C code and passed into kernel is uapi.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-05 11:01           ` Toke Høiland-Jørgensen
@ 2020-06-05 17:01             ` Alexei Starovoitov
  0 siblings, 0 replies; 10+ messages in thread
From: Alexei Starovoitov @ 2020-06-05 17:01 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Jesper Dangaard Brouer, David Ahern, bpf, Network Development,
	Daniel Borkmann, Andrii Nakryiko, Lorenzo Bianconi, David Miller

On Fri, Jun 5, 2020 at 4:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
> Jesper Dangaard Brouer <brouer@redhat.com> writes:
>
> > On Thu, 4 Jun 2020 10:33:41 -0700
> > Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >
> >> On Thu, Jun 04, 2020 at 10:40:06AM -0600, David Ahern wrote:
> >> > On 6/4/20 9:48 AM, Jesper Dangaard Brouer wrote:
> >> > > I will NOT send a patch that expose this in uapi/bpf.h.  As I explained
> >> > > before, this caused the issues for my userspace application, that
> >> > > automatically picked-up struct bpf_devmap_val, and started to fail
> >> > > (with no code changes), because it needed minus-1 as input.  I fear
> >> > > that this will cause more work for me later, when I have to helpout and
> >> > > support end-users on e.g. xdp-newbies list, as it will not be obvious
> >> > > to end-users why their programs map-insert start to fail.  I have given
> >> > > up, so I will not NACK anyone sending such a patch.
> >>
> >> Jesper,
> >>
> >> you gave wrong direction to David during development of the patches and
> >> now the devmap uapi is suffering the consequences.
> >>
> >> > >
> >> > > Why is it we need to support file-descriptor zero as a valid
> >> > > file-descriptor for a bpf-prog?
> >> >
> >> > That was a nice property of using the id instead of fd. And the init to
> >> > -1 is not unique to this; adopters of the bpf_set_link_xdp_fd_opts for
> >> > example have to do the same.
> >>
> >> I think it's better to adopt "fd==0 -> invalid" approach.
> >> It won't be unique here. We're already using it in other places in bpf syscall.
> >> I agree with Jesper that requiring -1 init of 2nd field is quite ugly
> >> and inconvenient.
> >
> > Great. If we can remove this requirement of -1 init (and let zero mean
> > feature isn't used), then I'm all for exposing expose in uapi/bpf.h.
>
> If we're going to officially deprecate fd 0 as a valid BPF fd, we should
> at least make sure users don't end up with such an fd after opening a
> BPF object. Not sure how the fd number assignment works, but could we
> make sure that the kernel never returns fd 0 for a BPF program/map?
>
> Alternatively, we could add a check in libbpf and either reject the
> call, or just call dup() before passing the fd to the kernel.

Tweaking libbpf to do dup() was on our todo list for some time.
I think it would be good to do it both in the kernel and in the libbpf.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF
  2020-06-05 16:58           ` Alexei Starovoitov
@ 2020-06-09 12:19             ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 10+ messages in thread
From: Jesper Dangaard Brouer @ 2020-06-09 12:19 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: David Ahern, bpf, Network Development, Daniel Borkmann,
	Andrii Nakryiko, Lorenzo Bianconi, David Miller, brouer

On Fri, 5 Jun 2020 09:58:26 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Fri, Jun 5, 2020 at 1:23 AM Jesper Dangaard Brouer <brouer@redhat.com> wrote:
> >
> > Great. If we can remove this requirement of -1 init (and let zero mean
> > feature isn't used), then I'm all for exposing expose in uapi/bpf.h.  
> 
> Not having it in bpf.h doesn't magically make it invisible.
> It's uapi because user space C sources rely on its fixed format.
> vmlinux.h contains all kernel types. both uapi and kernel internal.
> devmap selftest taking uapi 'struct bpf_devmap_val' from vmlinux.h is
> an awful hack.
> I prefer to keep vmlinux.h usage to bpf programs only.
> User space C code should interface with kernel via proper uapi headers.
> When vmlinux.h is used by bpf C program it's completely different from
> user space C code doing the same, because llvm emits relocations for
> bpf prog and libbpf adjusts them.
> So doing 'foo->bar' in bpf C is specific to target kernel, whereas
> user C code 'foo->bar' is a hard constant which bakes it into uapi
> that kernel has to keep backwards compatible.

Thank you for taking time to explain this.
Much appreciated, I agree with everything above.


> If in some distant future we teach both gcc and clang to do bpf-style
> relocations for x86 and teach ld.so to adjust them then we can dream
> about very differently looking kernel/user interfaces.
> Right now any struct used by user C code and passed into kernel is uapi.

I like this future vision.

I guess this patch is premature, as it operates in the same problem
space. It tried to address uapi flexbility, by letting userspace define
the uapi layout via BTF at map_create() time, and let kernel-side
validate BTF-info and restrict possible struct member names, which are
remapped to offsets inside the kernel.

I'll instead wait for the future...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-06-09 12:19 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-03 15:44 [PATCH bpf-next V1] bpf: devmap dynamic map-value area based on BTF Jesper Dangaard Brouer
2020-06-03 16:22 ` Alexei Starovoitov
2020-06-04 15:48   ` Jesper Dangaard Brouer
2020-06-04 16:40     ` David Ahern
2020-06-04 17:33       ` Alexei Starovoitov
2020-06-05  8:23         ` Jesper Dangaard Brouer
2020-06-05 11:01           ` Toke Høiland-Jørgensen
2020-06-05 17:01             ` Alexei Starovoitov
2020-06-05 16:58           ` Alexei Starovoitov
2020-06-09 12:19             ` Jesper Dangaard Brouer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).