[PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries
@ 2020-05-27  1:09 David Ahern
  2020-05-27  1:09 ` [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH David Ahern
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: David Ahern @ 2020-05-27  1:09 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, brouer, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

Implementation of Daniel's proposal for allowing DEVMAP entries to be
a device index, program fd pair.

Programs are run after XDP_REDIRECT and have access to both Rx device
and Tx device.

v1
- fixed prog put on invalid program - Toke
- changed write value from id to fd per Toke's comments about capabilities
- add test cases

David Ahern (5):
  bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  bpf: Add support to attach bpf program to a devmap entry
  xdp: Add xdp_txq_info to xdp_buff
  bpftool: Add SEC name for xdp programs attached to device map
  selftest: Add tests for XDP programs in devmap entries

 include/linux/bpf.h                           |   5 +
 include/net/xdp.h                             |   5 +
 include/uapi/linux/bpf.h                      |   3 +
 kernel/bpf/devmap.c                           | 143 +++++++++++++++---
 net/core/dev.c                                |  18 +++
 net/core/filter.c                             |  17 +++
 tools/include/uapi/linux/bpf.h                |   3 +
 tools/lib/bpf/libbpf.c                        |   2 +
 .../bpf/prog_tests/xdp_devmap_attach.c        | 101 +++++++++++++
 .../selftests/bpf/progs/test_xdp_devmap.c     |  19 +++
 .../bpf/progs/test_xdp_with_devmap.c          |  17 +++
 11 files changed, 315 insertions(+), 18 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_devmap.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_with_devmap.c

-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27  1:09 [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries David Ahern
@ 2020-05-27  1:09 ` David Ahern
  2020-05-27 10:26   ` Jesper Dangaard Brouer
  2020-05-27  1:09 ` [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry David Ahern
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27  1:09 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, brouer, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

Add support to DEVMAP and DEVMAP_HASH to support 8-byte values as a
<device index, program id> pair. To do this, a new struct is needed in
bpf_dtab_netdev to hold the values to return on lookup.

Signed-off-by: David Ahern <dsahern@kernel.org>
---
 kernel/bpf/devmap.c | 56 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a51d9fb7a359..95db6d8beebc 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -60,12 +60,22 @@ struct xdp_dev_bulk_queue {
 	unsigned int count;
 };
 
+/* devmap value can be dev index or dev index + prog fd/id */
+struct dev_map_ext_val {
+	u32 ifindex;	/* must be first for compat with 4-byte values */
+	union {
+		int prog_fd;  /* prog fd on write */
+		u32 prog_id;  /* prog id on read */
+	};
+};
+
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
 	struct rcu_head rcu;
 	unsigned int idx;
+	struct dev_map_ext_val val;
 };
 
 struct bpf_dtab {
@@ -108,9 +118,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 	u64 cost = 0;
 	int err;
 
-	/* check sanity of attributes */
+	/* check sanity of attributes. 2 value sizes supported:
+	 * 4 bytes: ifindex
+	 * 8 bytes: ifindex + prog fd
+	 */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
+	    (attr->value_size != 4 && attr->value_size != 8) ||
+	    attr->map_flags & ~DEV_CREATE_FLAG_MASK)
 		return -EINVAL;
 
 	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
@@ -472,18 +486,15 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
-	struct net_device *dev = obj ? obj->dev : NULL;
 
-	return dev ? &dev->ifindex : NULL;
+	return obj ? &obj->val : NULL;
 }
 
 static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
 								*(u32 *)key);
-	struct net_device *dev = obj ? obj->dev : NULL;
-
-	return dev ? &dev->ifindex : NULL;
+	return obj ? &obj->val : NULL;
 }
 
 static void __dev_map_entry_free(struct rcu_head *rcu)
@@ -552,15 +563,18 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 		return ERR_PTR(-ENOMEM);
 
 	dev->dev = dev_get_by_index(net, ifindex);
-	if (!dev->dev) {
-		kfree(dev);
-		return ERR_PTR(-EINVAL);
-	}
+	if (!dev->dev)
+		goto err_out;
 
 	dev->idx = idx;
 	dev->dtab = dtab;
 
+	dev->val.ifindex = ifindex;
+
 	return dev;
+err_out:
+	kfree(dev);
+	return ERR_PTR(-EINVAL);
 }
 
 static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
@@ -568,8 +582,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 ifindex = *(u32 *)value;
 	u32 i = *(u32 *)key;
+	u32 ifindex;
+
+	if (map->value_size == 4) {
+		ifindex = *(u32 *)value;
+	} else {
+		struct dev_map_ext_val *val = value;
+
+		ifindex = val->ifindex;
+	}
 
 	if (unlikely(map_flags > BPF_EXIST))
 		return -EINVAL;
@@ -609,10 +631,18 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 ifindex = *(u32 *)value;
 	u32 idx = *(u32 *)key;
 	unsigned long flags;
 	int err = -EEXIST;
+	u32 ifindex;
+
+	if (map->value_size == 4) {
+		ifindex = *(u32 *)value;
+	} else {
+		struct dev_map_ext_val *val = value;
+
+		ifindex = val->ifindex;
+	}
 
 	if (unlikely(map_flags > BPF_EXIST || !ifindex))
 		return -EINVAL;
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry
  2020-05-27  1:09 [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries David Ahern
  2020-05-27  1:09 ` [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH David Ahern
@ 2020-05-27  1:09 ` David Ahern
  2020-05-27 10:01   ` Toke Høiland-Jørgensen
  2020-05-27  1:09 ` [PATCH bpf-next 3/5] xdp: Add xdp_txq_info to xdp_buff David Ahern
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27  1:09 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, brouer, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

Add BPF_XDP_DEVMAP attach type for use with programs associated with a
DEVMAP entry.

DEVMAPs can associate a program with a device entry by setting the
value to <index, fd> pair. The program associated with the fd must have
type XDP with expected attach type BPF_XDP_DEVMAP. When a program is
associated with a device index, the program is run on an XDP_REDIRECT
and before the buffer is added to the per-cpu queue. At this point
rxq data is still valid; the next patch adds tx device information
allowing the prorgam to see both ingress and egress device indices.

XDP generic is skb based and XDP programs do not work with skb's. Block
the use case by walking maps used by a program that is to be attached
via xdpgeneric and fail if any of them are DEVMAP / DEVMAP_HASH with
8-bytes values.

Block attach of BPF_XDP_DEVMAP programs to devices.

Signed-off-by: David Ahern <dsahern@kernel.org>
---
 include/linux/bpf.h            |  5 ++
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/devmap.c            | 92 ++++++++++++++++++++++++++++++----
 net/core/dev.c                 | 18 +++++++
 tools/include/uapi/linux/bpf.h |  1 +
 5 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index efe8836b5c48..088751bc09aa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1242,6 +1242,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
+bool dev_map_can_have_prog(struct bpf_map *map);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_flush(void);
@@ -1355,6 +1356,10 @@ static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map
 {
 	return NULL;
 }
+static inline bool dev_map_can_have_prog(struct bpf_map *map)
+{
+	return false;
+}
 
 static inline void __dev_flush(void)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97e1fd19ff58..8c2c0d0c9a0e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -224,6 +224,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_GETPEERNAME,
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
+	BPF_XDP_DEVMAP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 95db6d8beebc..7658b3e2e7fc 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -73,6 +73,7 @@ struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
+	struct bpf_prog *xdp_prog;
 	struct rcu_head rcu;
 	unsigned int idx;
 	struct dev_map_ext_val val;
@@ -231,6 +232,8 @@ static void dev_map_free(struct bpf_map *map)
 
 			hlist_for_each_entry_safe(dev, next, head, index_hlist) {
 				hlist_del_rcu(&dev->index_hlist);
+				if (dev->xdp_prog)
+					bpf_prog_put(dev->xdp_prog);
 				dev_put(dev->dev);
 				kfree(dev);
 			}
@@ -245,6 +248,8 @@ static void dev_map_free(struct bpf_map *map)
 			if (!dev)
 				continue;
 
+			if (dev->xdp_prog)
+				bpf_prog_put(dev->xdp_prog);
 			dev_put(dev->dev);
 			kfree(dev);
 		}
@@ -331,6 +336,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
 	return -ENOENT;
 }
 
+bool dev_map_can_have_prog(struct bpf_map *map)
+{
+	if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
+	     map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
+	    map->value_size != 4)
+		return true;
+
+	return false;
+}
+
 static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
 	struct net_device *dev = bq->dev;
@@ -455,6 +470,35 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 	return bq_enqueue(dev, xdpf, dev_rx);
 }
 
+static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
+					 struct xdp_buff *xdp,
+					 struct bpf_prog *xdp_prog)
+{
+	u32 act;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_DROP:
+		fallthrough;
+	case XDP_PASS:
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(dev, xdp_prog, act);
+		act = XDP_DROP;
+		break;
+	}
+
+	if (act == XDP_DROP) {
+		xdp_return_buff(xdp);
+		xdp = NULL;
+	}
+
+	return xdp;
+}
+
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx)
 {
@@ -466,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 {
 	struct net_device *dev = dst->dev;
 
+	if (dst->xdp_prog) {
+		xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
+		if (!xdp)
+			return 0;
+	}
 	return __xdp_enqueue(dev, xdp, dev_rx);
 }
 
@@ -502,6 +551,8 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
 	struct bpf_dtab_netdev *dev;
 
 	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+	if (dev->xdp_prog)
+		bpf_prog_put(dev->xdp_prog);
 	dev_put(dev->dev);
 	kfree(dev);
 }
@@ -552,9 +603,10 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
 
 static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 						    struct bpf_dtab *dtab,
-						    u32 ifindex,
+						    u32 ifindex, int fd,
 						    unsigned int idx)
 {
+	struct bpf_prog *prog = NULL;
 	struct bpf_dtab_netdev *dev;
 
 	dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
@@ -566,12 +618,30 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 	if (!dev->dev)
 		goto err_out;
 
+	if (fd >= 0) {
+		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, false);
+		if (IS_ERR(prog))
+			goto err_put_dev;
+		if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+			goto err_put_prog;
+	}
+
 	dev->idx = idx;
 	dev->dtab = dtab;
-
+	if (prog) {
+		dev->xdp_prog = prog;
+		dev->val.prog_id = prog->aux->id;
+	} else {
+		dev->xdp_prog = NULL;
+		dev->val.prog_id = 0;
+	}
 	dev->val.ifindex = ifindex;
 
 	return dev;
+err_put_prog:
+	bpf_prog_put(prog);
+err_put_dev:
+	dev_put(dev->dev);
 err_out:
 	kfree(dev);
 	return ERR_PTR(-EINVAL);
@@ -582,8 +652,8 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 i = *(u32 *)key;
-	u32 ifindex;
+	u32 i = *(u32 *)key, ifindex;
+	int fd = -1;
 
 	if (map->value_size == 4) {
 		ifindex = *(u32 *)value;
@@ -591,6 +661,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 		struct dev_map_ext_val *val = value;
 
 		ifindex = val->ifindex;
+		fd = val->prog_fd;
 	}
 
 	if (unlikely(map_flags > BPF_EXIST))
@@ -602,8 +673,11 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 
 	if (!ifindex) {
 		dev = NULL;
+		/* can not specify fd if ifindex is 0 */
+		if (fd != -1)
+			return -EINVAL;
 	} else {
-		dev = __dev_map_alloc_node(net, dtab, ifindex, i);
+		dev = __dev_map_alloc_node(net, dtab, ifindex, fd, i);
 		if (IS_ERR(dev))
 			return PTR_ERR(dev);
 	}
@@ -631,10 +705,9 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 idx = *(u32 *)key;
+	u32 idx = *(u32 *)key, ifindex;
+	int err = -EEXIST, fd = -1;
 	unsigned long flags;
-	int err = -EEXIST;
-	u32 ifindex;
 
 	if (map->value_size == 4) {
 		ifindex = *(u32 *)value;
@@ -642,6 +715,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 		struct dev_map_ext_val *val = value;
 
 		ifindex = val->ifindex;
+		fd = val->prog_fd;
 	}
 
 	if (unlikely(map_flags > BPF_EXIST || !ifindex))
@@ -653,7 +727,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 	if (old_dev && (map_flags & BPF_NOEXIST))
 		goto out_err;
 
-	dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
+	dev = __dev_map_alloc_node(net, dtab, ifindex, fd, idx);
 	if (IS_ERR(dev)) {
 		err = PTR_ERR(dev);
 		goto out_err;
diff --git a/net/core/dev.c b/net/core/dev.c
index ae37586f6ee8..10684833f864 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 	struct bpf_prog *new = xdp->prog;
 	int ret = 0;
 
+	if (new) {
+		u32 i;
+
+		/* generic XDP does not work with DEVMAPs that can
+		 * have a bpf_prog installed on an entry
+		 */
+		for (i = 0; i < new->aux->used_map_cnt; i++) {
+			if (dev_map_can_have_prog(new->aux->used_maps[i]))
+				return -EINVAL;
+		}
+	}
+
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		rcu_assign_pointer(dev->xdp_prog, new);
@@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 			return -EINVAL;
 		}
 
+		if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+			NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
+			bpf_prog_put(prog);
+			return -EINVAL;
+		}
+
 		/* prog->aux->id may be 0 for orphaned device-bound progs */
 		if (prog->aux->id && prog->aux->id == prog_id) {
 			bpf_prog_put(prog);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 97e1fd19ff58..8c2c0d0c9a0e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -224,6 +224,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_GETPEERNAME,
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
+	BPF_XDP_DEVMAP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH bpf-next 3/5] xdp: Add xdp_txq_info to xdp_buff
  2020-05-27  1:09 [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries David Ahern
  2020-05-27  1:09 ` [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH David Ahern
  2020-05-27  1:09 ` [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry David Ahern
@ 2020-05-27  1:09 ` David Ahern
  2020-05-27  1:09 ` [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map David Ahern
  2020-05-27  1:09 ` [PATCH bpf-next 5/5] selftest: Add tests for XDP programs in devmap entries David Ahern
  4 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2020-05-27  1:09 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, brouer, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

Add xdp_txq_info as the Tx counterpart to xdp_rxq_info. At the
moment only the device is added. Other fields (queue_index)
can be added as use cases arise.

From a UAPI perspective, add egress_ifindex to xdp context for
bpf programs to see the Tx device.

Update the verifier to only allow accesses to egress_ifindex by
XDP programs with BPF_XDP_DEVMAP expected attach type.

Signed-off-by: David Ahern <dsahern@kernel.org>
---
 include/net/xdp.h              |  5 +++++
 include/uapi/linux/bpf.h       |  2 ++
 kernel/bpf/devmap.c            |  3 +++
 net/core/filter.c              | 17 +++++++++++++++++
 tools/include/uapi/linux/bpf.h |  2 ++
 5 files changed, 29 insertions(+)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 90f11760bd12..d54022959491 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -61,12 +61,17 @@ struct xdp_rxq_info {
 	struct xdp_mem_info mem;
 } ____cacheline_aligned; /* perf critical, avoid false-sharing */
 
+struct xdp_txq_info {
+	struct net_device *dev;
+};
+
 struct xdp_buff {
 	void *data;
 	void *data_end;
 	void *data_meta;
 	void *data_hard_start;
 	struct xdp_rxq_info *rxq;
+	struct xdp_txq_info *txq;
 	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8c2c0d0c9a0e..264de1484a66 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3624,6 +3624,8 @@ struct xdp_md {
 	/* Below access go through struct xdp_rxq_info */
 	__u32 ingress_ifindex; /* rxq->dev->ifindex */
 	__u32 rx_queue_index;  /* rxq->queue_index  */
+
+	__u32 egress_ifindex;  /* txq->dev->ifindex */
 };
 
 enum sk_action {
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 7658b3e2e7fc..2fefa7f65d90 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -474,8 +474,11 @@ static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
 					 struct xdp_buff *xdp,
 					 struct bpf_prog *xdp_prog)
 {
+	struct xdp_txq_info txq = { .dev = dev };
 	u32 act;
 
+	xdp->txq = &txq;
+
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	switch (act) {
 	case XDP_DROP:
diff --git a/net/core/filter.c b/net/core/filter.c
index bd2853d23b50..199e02a30381 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6994,6 +6994,13 @@ static bool xdp_is_valid_access(int off, int size,
 				const struct bpf_prog *prog,
 				struct bpf_insn_access_aux *info)
 {
+	if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
+		switch (off) {
+		case offsetof(struct xdp_md, egress_ifindex):
+			return false;
+		}
+	}
+
 	if (type == BPF_WRITE) {
 		if (bpf_prog_is_dev_bound(prog->aux)) {
 			switch (off) {
@@ -7942,6 +7949,16 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				      offsetof(struct xdp_rxq_info,
 					       queue_index));
 		break;
+	case offsetof(struct xdp_md, egress_ifindex):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, txq));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct xdp_txq_info, dev));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct net_device, ifindex));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8c2c0d0c9a0e..264de1484a66 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3624,6 +3624,8 @@ struct xdp_md {
 	/* Below access go through struct xdp_rxq_info */
 	__u32 ingress_ifindex; /* rxq->dev->ifindex */
 	__u32 rx_queue_index;  /* rxq->queue_index  */
+
+	__u32 egress_ifindex;  /* txq->dev->ifindex */
 };
 
 enum sk_action {
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map
  2020-05-27  1:09 [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries David Ahern
                   ` (2 preceding siblings ...)
  2020-05-27  1:09 ` [PATCH bpf-next 3/5] xdp: Add xdp_txq_info to xdp_buff David Ahern
@ 2020-05-27  1:09 ` David Ahern
  2020-05-27 10:02   ` Toke Høiland-Jørgensen
  2020-05-27  1:09 ` [PATCH bpf-next 5/5] selftest: Add tests for XDP programs in devmap entries David Ahern
  4 siblings, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27  1:09 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, brouer, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

Support SEC("xdp_dm*") as a short cut for loading the program with
type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP.

Signed-off-by: David Ahern <dsahern@kernel.org>
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fa04cbe547ed..563827b260e8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6657,6 +6657,8 @@ static const struct bpf_sec_def section_defs[] = {
 		.expected_attach_type = BPF_TRACE_ITER,
 		.is_attach_btf = true,
 		.attach_fn = attach_iter),
+	BPF_EAPROG_SEC("xdp_dm",		BPF_PROG_TYPE_XDP,
+						BPF_XDP_DEVMAP),
 	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH bpf-next 5/5] selftest: Add tests for XDP programs in devmap entries
  2020-05-27  1:09 [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries David Ahern
                   ` (3 preceding siblings ...)
  2020-05-27  1:09 ` [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map David Ahern
@ 2020-05-27  1:09 ` David Ahern
  4 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2020-05-27  1:09 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, brouer, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

Add tests to verify ability to add an XDP program to a
entry in a DEVMAP.

Add negative tests to show DEVMAP programs can not be
attached to devices as a normal XDP program, and accesses
to egress_ifindex require BPF_XDP_DEVMAP attach type.

Signed-off-by: David Ahern <dsahern@kernel.org>
---
 .../bpf/prog_tests/xdp_devmap_attach.c        | 101 ++++++++++++++++++
 .../selftests/bpf/progs/test_xdp_devmap.c     |  19 ++++
 .../bpf/progs/test_xdp_with_devmap.c          |  17 +++
 3 files changed, 137 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_devmap.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_with_devmap.c

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
new file mode 100644
index 000000000000..4f608bde9cbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#define IFINDEX_LO 1
+
+struct devmap_ext_val {
+	u32 ifindex;    /* must be first for compat with 4-byte values */
+	union {
+		int prog_fd;  /* prog fd on write */
+		u32 prog_id;  /* prog id on read */
+	};
+};
+
+void test_xdp_devmap_attach(void)
+{
+	struct bpf_prog_load_attr attr = {
+		.prog_type = BPF_PROG_TYPE_XDP,
+	};
+	struct bpf_object *obj, *dm_obj = NULL;
+	int err, dm_fd = -1, fd = -1, map_fd;
+	struct bpf_prog_info info = {};
+	struct devmap_ext_val val = {
+		.ifindex = IFINDEX_LO,
+	};
+	__u32 id, len = sizeof(info);
+	__u32 duration = 0, idx = 0;
+
+	attr.file = "./test_xdp_with_devmap.o",
+	err = bpf_prog_load_xattr(&attr, &obj, &fd);
+	if (CHECK(err, "load of xdp program with 8-byte devmap",
+		  "err %d errno %d\n", err, errno))
+		return;
+
+	/* can not attach program with DEVMAPs that allow programs */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Generic attach of program with 8-byte devmap",
+	      "should have failed\n");
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "dm_ports");
+	if (CHECK(map_fd < 0, "Lookup dm_ports map",
+		  "err %d errno %d\n", err, errno))
+		goto out_close;
+
+	/*
+	 * basic test - load program to be installed in devmap;
+	 * also verifies access to ingress and egress ifindices.
+	 */
+	attr.expected_attach_type = BPF_XDP_DEVMAP;
+	attr.file = "./test_xdp_devmap.o";
+	err = bpf_prog_load_xattr(&attr, &dm_obj, &dm_fd);
+	if (CHECK(err, "Load of BPF_XDP_DEVMAP program",
+		  "err %d errno %d\n", err, errno))
+		goto out_close;
+
+	err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
+	if (CHECK_FAIL(err))
+		goto out_close;
+
+	val.prog_fd = dm_fd;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err, "Add program to devmap entry",
+	      "err %d errno %d\n", err, errno);
+
+	err = bpf_map_lookup_elem(map_fd, &id, &val);
+	CHECK(err, "Read devmap entry",
+	      "err %d errno %d\n", err, errno);
+	CHECK(info.id != val.prog_id, "Expected program id in devmap entry",
+	      "expected %u read %u\n", info.id, val.prog_id);
+
+	/* can not attach BPF_XDP_DEVMAP program to a device */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
+	      "should have failed\n");
+
+	bpf_object__close(dm_obj);
+
+	/* Load of BPF_XDP_DEVMAP as XDP should fail because of egress index */
+	attr.expected_attach_type = 0;
+	err = bpf_prog_load_xattr(&attr, &dm_obj, &dm_fd);
+	if (CHECK(err == 0,
+		  "Load of XDP program accessing egress ifindex without attach type",
+		  "should have failed\n"))
+		bpf_object__close(dm_obj);
+
+	attr.file = "./xdp_dummy.o";
+	err = bpf_prog_load_xattr(&attr, &dm_obj, &dm_fd);
+	if (CHECK_FAIL(err))
+		goto out_close;
+
+	val.ifindex = 1;
+	val.prog_fd = dm_fd;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
+	      "should have failed\n");
+
+	bpf_object__close(dm_obj);
+
+out_close:
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap.c
new file mode 100644
index 000000000000..64fc2c3cae01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* program inserted into devmap entry */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_devmap_log")
+int xdpdm_devlog(struct xdp_md *ctx)
+{
+	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	unsigned int len = data_end - data;
+
+	bpf_trace_printk(fmt, sizeof(fmt), ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap.c
new file mode 100644
index 000000000000..cb76c9859e80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_map_def SEC("maps") dm_ports = {
+	.type = BPF_MAP_TYPE_DEVMAP,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32) * 2,
+	.max_entries = 4,
+};
+
+SEC("xdp_devmap")
+int xdp_with_devmap(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&dm_ports, 1, 0);
+}
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry
  2020-05-27  1:09 ` [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry David Ahern
@ 2020-05-27 10:01   ` Toke Høiland-Jørgensen
  2020-05-27 14:02     ` David Ahern
  0 siblings, 1 reply; 20+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-05-27 10:01 UTC (permalink / raw)
  To: David Ahern, netdev
  Cc: davem, kuba, brouer, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

David Ahern <dsahern@kernel.org> writes:

> Add BPF_XDP_DEVMAP attach type for use with programs associated with a
> DEVMAP entry.
>
> DEVMAPs can associate a program with a device entry by setting the
> value to <index, fd> pair. The program associated with the fd must have
> type XDP with expected attach type BPF_XDP_DEVMAP. When a program is
> associated with a device index, the program is run on an XDP_REDIRECT
> and before the buffer is added to the per-cpu queue. At this point
> rxq data is still valid; the next patch adds tx device information
> allowing the prorgam to see both ingress and egress device indices.
>
> XDP generic is skb based and XDP programs do not work with skb's. Block
> the use case by walking maps used by a program that is to be attached
> via xdpgeneric and fail if any of them are DEVMAP / DEVMAP_HASH with
> 8-bytes values.
>
> Block attach of BPF_XDP_DEVMAP programs to devices.
>
> Signed-off-by: David Ahern <dsahern@kernel.org>
> ---
>  include/linux/bpf.h            |  5 ++
>  include/uapi/linux/bpf.h       |  1 +
>  kernel/bpf/devmap.c            | 92 ++++++++++++++++++++++++++++++----
>  net/core/dev.c                 | 18 +++++++
>  tools/include/uapi/linux/bpf.h |  1 +
>  5 files changed, 108 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index efe8836b5c48..088751bc09aa 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1242,6 +1242,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>  		    struct net_device *dev_rx);
>  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
>  			     struct bpf_prog *xdp_prog);
> +bool dev_map_can_have_prog(struct bpf_map *map);
>  
>  struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
>  void __cpu_map_flush(void);
> @@ -1355,6 +1356,10 @@ static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map
>  {
>  	return NULL;
>  }
> +static inline bool dev_map_can_have_prog(struct bpf_map *map)
> +{
> +	return false;
> +}
>  
>  static inline void __dev_flush(void)
>  {
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 97e1fd19ff58..8c2c0d0c9a0e 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -224,6 +224,7 @@ enum bpf_attach_type {
>  	BPF_CGROUP_INET6_GETPEERNAME,
>  	BPF_CGROUP_INET4_GETSOCKNAME,
>  	BPF_CGROUP_INET6_GETSOCKNAME,
> +	BPF_XDP_DEVMAP,
>  	__MAX_BPF_ATTACH_TYPE
>  };
>  
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index 95db6d8beebc..7658b3e2e7fc 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -73,6 +73,7 @@ struct bpf_dtab_netdev {
>  	struct net_device *dev; /* must be first member, due to tracepoint */
>  	struct hlist_node index_hlist;
>  	struct bpf_dtab *dtab;
> +	struct bpf_prog *xdp_prog;
>  	struct rcu_head rcu;
>  	unsigned int idx;
>  	struct dev_map_ext_val val;
> @@ -231,6 +232,8 @@ static void dev_map_free(struct bpf_map *map)
>  
>  			hlist_for_each_entry_safe(dev, next, head, index_hlist) {
>  				hlist_del_rcu(&dev->index_hlist);
> +				if (dev->xdp_prog)
> +					bpf_prog_put(dev->xdp_prog);
>  				dev_put(dev->dev);
>  				kfree(dev);
>  			}
> @@ -245,6 +248,8 @@ static void dev_map_free(struct bpf_map *map)
>  			if (!dev)
>  				continue;
>  
> +			if (dev->xdp_prog)
> +				bpf_prog_put(dev->xdp_prog);
>  			dev_put(dev->dev);
>  			kfree(dev);
>  		}
> @@ -331,6 +336,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
>  	return -ENOENT;
>  }
>  
> +bool dev_map_can_have_prog(struct bpf_map *map)
> +{
> +	if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
> +	     map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
> +	    map->value_size != 4)
> +		return true;
> +
> +	return false;
> +}
> +
>  static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
>  {
>  	struct net_device *dev = bq->dev;
> @@ -455,6 +470,35 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
>  	return bq_enqueue(dev, xdpf, dev_rx);
>  }
>  
> +static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
> +					 struct xdp_buff *xdp,
> +					 struct bpf_prog *xdp_prog)
> +{
> +	u32 act;
> +
> +	act = bpf_prog_run_xdp(xdp_prog, xdp);
> +	switch (act) {
> +	case XDP_DROP:
> +		fallthrough;
> +	case XDP_PASS:
> +		break;
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +		trace_xdp_exception(dev, xdp_prog, act);
> +		act = XDP_DROP;
> +		break;
> +	}
> +
> +	if (act == XDP_DROP) {
> +		xdp_return_buff(xdp);
> +		xdp = NULL;
> +	}
> +
> +	return xdp;
> +}
> +
>  int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
>  		    struct net_device *dev_rx)
>  {
> @@ -466,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>  {
>  	struct net_device *dev = dst->dev;
>  
> +	if (dst->xdp_prog) {
> +		xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
> +		if (!xdp)
> +			return 0;
> +	}
>  	return __xdp_enqueue(dev, xdp, dev_rx);
>  }

Did you give any special consideration to where the hook should be? I'm
asking because my immediate thought was that it should be on flush
(i.e., in bq_xmit_all()), but now that I see this I'm so sure anymore.
What were your thoughts around this?

-Toke


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map
  2020-05-27  1:09 ` [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map David Ahern
@ 2020-05-27 10:02   ` Toke Høiland-Jørgensen
  2020-05-27 14:03     ` David Ahern
  0 siblings, 1 reply; 20+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-05-27 10:02 UTC (permalink / raw)
  To: David Ahern, netdev
  Cc: davem, kuba, brouer, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, David Ahern

David Ahern <dsahern@kernel.org> writes:

> Support SEC("xdp_dm*") as a short cut for loading the program with
> type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP.

You're not using this in the selftest; shouldn't you be? Also, the
prefix should be libbpf: not bpftool:, no?

-Toke


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27  1:09 ` [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH David Ahern
@ 2020-05-27 10:26   ` Jesper Dangaard Brouer
  2020-05-27 13:56     ` David Ahern
  2020-05-27 14:27     ` David Ahern
  0 siblings, 2 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2020-05-27 10:26 UTC (permalink / raw)
  To: David Ahern
  Cc: netdev, davem, kuba, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin, dsahern, brouer

On Tue, 26 May 2020 19:09:01 -0600
David Ahern <dsahern@kernel.org> wrote:

> Add support to DEVMAP and DEVMAP_HASH to support 8-byte values as a
> <device index, program id> pair. To do this, a new struct is needed in
> bpf_dtab_netdev to hold the values to return on lookup.
> 
> Signed-off-by: David Ahern <dsahern@kernel.org>
> ---
>  kernel/bpf/devmap.c | 56 ++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 43 insertions(+), 13 deletions(-)
> 
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index a51d9fb7a359..95db6d8beebc 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -60,12 +60,22 @@ struct xdp_dev_bulk_queue {
>  	unsigned int count;
>  };
>  
> +/* devmap value can be dev index or dev index + prog fd/id */
> +struct dev_map_ext_val {
> +	u32 ifindex;	/* must be first for compat with 4-byte values */
> +	union {
> +		int prog_fd;  /* prog fd on write */
> +		u32 prog_id;  /* prog id on read */
> +	};
> +};

This smells like a BTF structure.
Andrii BTF does support union's right?


>  struct bpf_dtab_netdev {
>  	struct net_device *dev; /* must be first member, due to tracepoint */
>  	struct hlist_node index_hlist;
>  	struct bpf_dtab *dtab;
>  	struct rcu_head rcu;
>  	unsigned int idx;
> +	struct dev_map_ext_val val;
>  };
>  
>  struct bpf_dtab {
> @@ -108,9 +118,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
>  	u64 cost = 0;
>  	int err;
>  
> -	/* check sanity of attributes */
> +	/* check sanity of attributes. 2 value sizes supported:
> +	 * 4 bytes: ifindex
> +	 * 8 bytes: ifindex + prog fd
> +	 */
>  	if (attr->max_entries == 0 || attr->key_size != 4 ||
> -	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
> +	    (attr->value_size != 4 && attr->value_size != 8) ||

IMHO we really need to leverage BTF here, as I'm sure we need to do more
extensions, and this size matching will get more and more unmaintainable.

With BTF in place, dumping the map via bpftool, will also make the
fields "self-documenting".

I will try to implement something that uses BTF for this case (and cpumap).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27 10:26   ` Jesper Dangaard Brouer
@ 2020-05-27 13:56     ` David Ahern
  2020-05-27 14:57       ` Toke Høiland-Jørgensen
  2020-05-27 14:27     ` David Ahern
  1 sibling, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27 13:56 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, David Ahern
  Cc: netdev, davem, kuba, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

On 5/27/20 4:26 AM, Jesper Dangaard Brouer wrote:
>> @@ -108,9 +118,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
>>  	u64 cost = 0;
>>  	int err;
>>  
>> -	/* check sanity of attributes */
>> +	/* check sanity of attributes. 2 value sizes supported:
>> +	 * 4 bytes: ifindex
>> +	 * 8 bytes: ifindex + prog fd
>> +	 */
>>  	if (attr->max_entries == 0 || attr->key_size != 4 ||
>> -	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
>> +	    (attr->value_size != 4 && attr->value_size != 8) ||
> 
> IMHO we really need to leverage BTF here, as I'm sure we need to do more
> extensions, and this size matching will get more and more unmaintainable.
> 
> With BTF in place, dumping the map via bpftool, will also make the
> fields "self-documenting".
> 
> I will try to implement something that uses BTF for this case (and cpumap).
> 

as mentioned in a past response, BTF does not make any fields special
and this code should not assume it either. You need to know precisely
which 4 bytes is the program fd that needs to be looked up, and that
AFAIK is beyond the scope of BTF.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry
  2020-05-27 10:01   ` Toke Høiland-Jørgensen
@ 2020-05-27 14:02     ` David Ahern
  2020-05-27 14:58       ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27 14:02 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, David Ahern, netdev
  Cc: davem, kuba, brouer, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

On 5/27/20 4:01 AM, Toke Høiland-Jørgensen wrote:
> Did you give any special consideration to where the hook should be? I'm
> asking because my immediate thought was that it should be on flush
> (i.e., in bq_xmit_all()), but now that I see this I'm so sure anymore.
> What were your thoughts around this?

I chose this spot for many reasons:

1. dev_map_enqueue has the bpf_dtab_netdev structure which holds the program

2. programs take xdp_buff, and dev_map_enqueue still has the xdp_buff
with the rx information; no need to convert from buff to frame losing rx
data, enqueue, back to buff to run program, back to frame to hand off to
the driver.

3. no sense enqueuing if the device program drops the frame.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map
  2020-05-27 10:02   ` Toke Høiland-Jørgensen
@ 2020-05-27 14:03     ` David Ahern
  2020-05-27 15:01       ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27 14:03 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, David Ahern, netdev
  Cc: davem, kuba, brouer, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

On 5/27/20 4:02 AM, Toke Høiland-Jørgensen wrote:
> David Ahern <dsahern@kernel.org> writes:
> 
>> Support SEC("xdp_dm*") as a short cut for loading the program with
>> type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP.
> 
> You're not using this in the selftest; shouldn't you be? Also, the
> prefix should be libbpf: not bpftool:, no?
> 

The selftest is exercising kernel APIs - what is allowed and what is not.

Yes, the subject should be libbpf.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27 10:26   ` Jesper Dangaard Brouer
  2020-05-27 13:56     ` David Ahern
@ 2020-05-27 14:27     ` David Ahern
  2020-05-27 15:30       ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 20+ messages in thread
From: David Ahern @ 2020-05-27 14:27 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, David Ahern
  Cc: netdev, davem, kuba, toke, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

On 5/27/20 4:26 AM, Jesper Dangaard Brouer wrote:
> IMHO we really need to leverage BTF here, as I'm sure we need to do more
> extensions, and this size matching will get more and more unmaintainable.
> 
> With BTF in place, dumping the map via bpftool, will also make the
> fields "self-documenting".

furthermore, the kernel is changing the value - an fd is passed in and
an id is returned. I do not see how any of this fits into BTF.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27 13:56     ` David Ahern
@ 2020-05-27 14:57       ` Toke Høiland-Jørgensen
  2020-05-27 15:24         ` David Ahern
  0 siblings, 1 reply; 20+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-05-27 14:57 UTC (permalink / raw)
  To: David Ahern, Jesper Dangaard Brouer, David Ahern
  Cc: netdev, davem, kuba, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

David Ahern <dsahern@gmail.com> writes:

> On 5/27/20 4:26 AM, Jesper Dangaard Brouer wrote:
>>> @@ -108,9 +118,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
>>>  	u64 cost = 0;
>>>  	int err;
>>>  
>>> -	/* check sanity of attributes */
>>> +	/* check sanity of attributes. 2 value sizes supported:
>>> +	 * 4 bytes: ifindex
>>> +	 * 8 bytes: ifindex + prog fd
>>> +	 */
>>>  	if (attr->max_entries == 0 || attr->key_size != 4 ||
>>> -	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
>>> +	    (attr->value_size != 4 && attr->value_size != 8) ||
>> 
>> IMHO we really need to leverage BTF here, as I'm sure we need to do more
>> extensions, and this size matching will get more and more unmaintainable.
>> 
>> With BTF in place, dumping the map via bpftool, will also make the
>> fields "self-documenting".
>> 
>> I will try to implement something that uses BTF for this case (and cpumap).
>> 
>
> as mentioned in a past response, BTF does not make any fields special
> and this code should not assume it either.

Either way you're creating a contract where the kernel says "first four
bytes is the ifindex, second four bytes is the fd/id". BTF just makes
that explicit, and allows userspace to declare that it agrees this is
what the fields should mean. And gives us more flexibility when
extending the API later than just adding stuff at the end and looking at
the size...

> You need to know precisely which 4 bytes is the program fd that needs
> to be looked up, and that AFAIK is beyond the scope of BTF.

Exactly - BTF is a way for userspace to explicitly tell the kernel "I am
going to put the fd into these four bytes of the value field", instead
of the kernel implicitly assuming it's always bytes 5-8.

-Toke


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry
  2020-05-27 14:02     ` David Ahern
@ 2020-05-27 14:58       ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 20+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-05-27 14:58 UTC (permalink / raw)
  To: David Ahern, David Ahern, netdev
  Cc: davem, kuba, brouer, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

David Ahern <dsahern@gmail.com> writes:

> On 5/27/20 4:01 AM, Toke Høiland-Jørgensen wrote:
>> Did you give any special consideration to where the hook should be? I'm
>> asking because my immediate thought was that it should be on flush
>> (i.e., in bq_xmit_all()), but now that I see this I'm so sure anymore.
>> What were your thoughts around this?
>
> I chose this spot for many reasons:
>
> 1. dev_map_enqueue has the bpf_dtab_netdev structure which holds the program
>
> 2. programs take xdp_buff, and dev_map_enqueue still has the xdp_buff
> with the rx information; no need to convert from buff to frame losing rx
> data, enqueue, back to buff to run program, back to frame to hand off to
> the driver.
>
> 3. no sense enqueuing if the device program drops the frame.

Right, makes sense; thank you for explaining :)

-Toke


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map
  2020-05-27 14:03     ` David Ahern
@ 2020-05-27 15:01       ` Toke Høiland-Jørgensen
  2020-05-27 15:43         ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 20+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-05-27 15:01 UTC (permalink / raw)
  To: David Ahern, David Ahern, netdev
  Cc: davem, kuba, brouer, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

David Ahern <dsahern@gmail.com> writes:

> On 5/27/20 4:02 AM, Toke Høiland-Jørgensen wrote:
>> David Ahern <dsahern@kernel.org> writes:
>> 
>>> Support SEC("xdp_dm*") as a short cut for loading the program with
>>> type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP.
>> 
>> You're not using this in the selftest; shouldn't you be? Also, the
>> prefix should be libbpf: not bpftool:, no?
>> 
>
> The selftest is exercising kernel APIs - what is allowed and what is
> not.

Sure, but they also de facto serve as example code for features that are
not documented anywhere else, so just seemed a bit odd to me that you
were not using this to mark the programs.

Anyway, not going to insist if you prefer explicitly setting
expected_attach_type...

-Toke


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27 14:57       ` Toke Høiland-Jørgensen
@ 2020-05-27 15:24         ` David Ahern
  0 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2020-05-27 15:24 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, Jesper Dangaard Brouer, David Ahern
  Cc: netdev, davem, kuba, daniel, john.fastabend, ast, kafai,
	songliubraving, yhs, andriin

On 5/27/20 8:57 AM, Toke Høiland-Jørgensen wrote:
> 
> Either way you're creating a contract where the kernel says "first four
> bytes is the ifindex, second four bytes is the fd/id". BTF just makes
> that explicit, and allows userspace to declare that it agrees this is
> what the fields should mean. And gives us more flexibility when
> extending the API later than just adding stuff at the end and looking at
> the size...
> 
>> You need to know precisely which 4 bytes is the program fd that needs
>> to be looked up, and that AFAIK is beyond the scope of BTF.
> 
> Exactly - BTF is a way for userspace to explicitly tell the kernel "I am
> going to put the fd into these four bytes of the value field", instead
> of the kernel implicitly assuming it's always bytes 5-8.
> 

Really, I should define that struct in uapi/linux/bpf.h. The ifindex
field has special meaning: the kernel needs to convert it to a
net_device. The prog_fd field has special meaning: it should map to a
bpf program.

This use case is not in BTF's scope. But, prove me wrong. Ideas are
cheap; code is hard. Show me the code that implements your idea.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27 14:27     ` David Ahern
@ 2020-05-27 15:30       ` Jesper Dangaard Brouer
  2020-05-27 18:38         ` David Ahern
  0 siblings, 1 reply; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2020-05-27 15:30 UTC (permalink / raw)
  To: David Ahern
  Cc: David Ahern, netdev, davem, kuba, toke, daniel, john.fastabend,
	ast, kafai, songliubraving, yhs, andriin, brouer

On Wed, 27 May 2020 08:27:36 -0600
David Ahern <dsahern@gmail.com> wrote:

> On 5/27/20 4:26 AM, Jesper Dangaard Brouer wrote:
> > IMHO we really need to leverage BTF here, as I'm sure we need to do more
> > extensions, and this size matching will get more and more unmaintainable.
> > 
> > With BTF in place, dumping the map via bpftool, will also make the
> > fields "self-documenting".  
> 
> furthermore, the kernel is changing the value - an fd is passed in and
> an id is returned. I do not see how any of this fits into BTF.

It can, as BTF actually support union's (I just tested that).

For the sake of end-users understanding this, I do wonder if it is
better to define the struct without the union, and have longer names
that will be part of BTF description, e.g (dumped via bpftool):

 struct dev_map_ext_val {
        u32 ifindex;
        int bpf_prog_fd_write;
        u32 bpf_prog_id_read;
 };

But a union would also work (also tested via BPF loading and BTF dumpinmg):

 struct dev_map_ext_val {
        u32 ifindex;
        union {
                int bpf_prog_fd_write;
                u32 bpf_prog_id_read;
        };
 };

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map
  2020-05-27 15:01       ` Toke Høiland-Jørgensen
@ 2020-05-27 15:43         ` Jesper Dangaard Brouer
  0 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2020-05-27 15:43 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: David Ahern, David Ahern, netdev, davem, kuba, daniel,
	john.fastabend, ast, kafai, songliubraving, yhs, andriin, brouer

On Wed, 27 May 2020 17:01:10 +0200
Toke Høiland-Jørgensen <toke@redhat.com> wrote:

> David Ahern <dsahern@gmail.com> writes:
> 
> > On 5/27/20 4:02 AM, Toke Høiland-Jørgensen wrote:  
> >> David Ahern <dsahern@kernel.org> writes:
> >>   
> >>> Support SEC("xdp_dm*") as a short cut for loading the program with
> >>> type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP.  
> >> 
> >> You're not using this in the selftest; shouldn't you be? Also, the
> >> prefix should be libbpf: not bpftool:, no?
> >>   
> >
> > The selftest is exercising kernel APIs - what is allowed and what is
> > not.  
> 
> Sure, but they also de facto serve as example code for features that are
> not documented anywhere else, so just seemed a bit odd to me that you
> were not using this to mark the programs.
> 
> Anyway, not going to insist if you prefer explicitly setting
> expected_attach_type...

I actually think that it is better to demonstrate that it is possible to set:	

 attr.expected_attach_type = BPF_XDP_DEVMAP;

Because people will be grepping the source code to find examples ;-)

We could add a comment, that say SEC("xdp_dm") can be used as short cut.
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH
  2020-05-27 15:30       ` Jesper Dangaard Brouer
@ 2020-05-27 18:38         ` David Ahern
  0 siblings, 0 replies; 20+ messages in thread
From: David Ahern @ 2020-05-27 18:38 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: David Ahern, netdev, davem, kuba, toke, daniel, john.fastabend,
	ast, kafai, songliubraving, yhs, andriin

On 5/27/20 9:30 AM, Jesper Dangaard Brouer wrote:
> On Wed, 27 May 2020 08:27:36 -0600
> David Ahern <dsahern@gmail.com> wrote:
> 
>> On 5/27/20 4:26 AM, Jesper Dangaard Brouer wrote:
>>> IMHO we really need to leverage BTF here, as I'm sure we need to do more
>>> extensions, and this size matching will get more and more unmaintainable.
>>>
>>> With BTF in place, dumping the map via bpftool, will also make the
>>> fields "self-documenting".  
>>
>> furthermore, the kernel is changing the value - an fd is passed in and
>> an id is returned. I do not see how any of this fits into BTF.
> 
> It can, as BTF actually support union's (I just tested that).
> 

You are trying to force an arbitrary kernel API with BTF, and it adds no
value over a static struct definition that grows as new capability is added.

DEVMAPs (and CPUMAP) are not like other maps - the fields in the entries
have meaning to *the kernel*. That means the kernel needs to know the
fields and what they represent - be it a device index, a program fd
converted to an id, a cpuid, or any future extension. If you add 'u32
foo' to this interface, you still need kernel code to understand 'foo'
maps to resource 'bar' via lookup function 'f'. You can not do this
dynamically. BTF does not make sense here.

Furthermore, if the kernel does not understand a field then it better
return an error code - EINVAL so the user knows expected functionality
is not supported by that kernel.

> 
> But a union would also work (also tested via BPF loading and BTF dumpinmg):
> 
>  struct dev_map_ext_val {
>         u32 ifindex;
>         union {
>                 int bpf_prog_fd_write;
>                 u32 bpf_prog_id_read;
>         };
>  };
> 

I prefer the union and without the verb; comments convey the same
meaning. The fd has no meaning beyond the process that created the entry
so it does not need to be kept around bloating the interface.

For v2, I moved the struct to uapi/linux/bpf.h and added comments.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-05-27 18:48 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-27  1:09 [PATCH bpf-next 0/5] bpf: Add support for XDP programs in DEVMAP entries David Ahern
2020-05-27  1:09 ` [PATCH bpf-next 1/5] bpf: Handle 8-byte values in DEVMAP and DEVMAP_HASH David Ahern
2020-05-27 10:26   ` Jesper Dangaard Brouer
2020-05-27 13:56     ` David Ahern
2020-05-27 14:57       ` Toke Høiland-Jørgensen
2020-05-27 15:24         ` David Ahern
2020-05-27 14:27     ` David Ahern
2020-05-27 15:30       ` Jesper Dangaard Brouer
2020-05-27 18:38         ` David Ahern
2020-05-27  1:09 ` [PATCH bpf-next 2/5] bpf: Add support to attach bpf program to a devmap entry David Ahern
2020-05-27 10:01   ` Toke Høiland-Jørgensen
2020-05-27 14:02     ` David Ahern
2020-05-27 14:58       ` Toke Høiland-Jørgensen
2020-05-27  1:09 ` [PATCH bpf-next 3/5] xdp: Add xdp_txq_info to xdp_buff David Ahern
2020-05-27  1:09 ` [PATCH bpf-next 4/5] bpftool: Add SEC name for xdp programs attached to device map David Ahern
2020-05-27 10:02   ` Toke Høiland-Jørgensen
2020-05-27 14:03     ` David Ahern
2020-05-27 15:01       ` Toke Høiland-Jørgensen
2020-05-27 15:43         ` Jesper Dangaard Brouer
2020-05-27  1:09 ` [PATCH bpf-next 5/5] selftest: Add tests for XDP programs in devmap entries David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.