Re: [PATCH bpf-next 2/7] bpf, xdp: add bpf_link-based XDP attachment API

* Re: [PATCH bpf-next 2/7] bpf, xdp: add bpf_link-based XDP attachment API
       [not found] ` <20200710224924.4087399-3-andriin@fb.com>
@ 2020-07-13 14:19   ` David Ahern
  2020-07-13 22:33     ` Andrii Nakryiko
  2020-07-14 13:57   ` Toke Høiland-Jørgensen
  1 sibling, 1 reply; 15+ messages in thread
From: David Ahern @ 2020-07-13 14:19 UTC (permalink / raw)
  To: Andrii Nakryiko, bpf, netdev, ast, daniel
  Cc: andrii.nakryiko, kernel-team, Jakub Kicinski, Andrey Ignatov,
	Takshak Chahande, Toke Høiland-Jørgensen

On 7/10/20 4:49 PM, Andrii Nakryiko wrote:
> Add bpf_link-based API (bpf_xdp_link) to attach BPF XDP program through
> BPF_LINK_CREATE command.
> 
> bpf_xdp_link is mutually exclusive with direct BPF program attachment,
> previous BPF program should be detached prior to attempting to create a new
> bpf_xdp_link attachment (for a given XDP mode). Once link is attached, it
> can't be replaced by other BPF program attachment or link attachment. It will
> be detached only when the last BPF link FD is closed.
> 
> bpf_xdp_link will be auto-detached when net_device is shutdown, similarly to
> how other BPF links behave (cgroup, flow_dissector). At that point bpf_link
> will become defunct, but won't be destroyed until last FD is closed.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---
>  include/linux/netdevice.h |   6 +
>  include/uapi/linux/bpf.h  |   7 +-
>  kernel/bpf/syscall.c      |   5 +
>  net/core/dev.c            | 385 ++++++++++++++++++++++++++++----------

That's big diff for 1 patch. A fair bit of is refactoring / code
movement that can be done in a separate refactoring patch making it
cleaer what changes you need for the bpf_link piece.

>  4 files changed, 301 insertions(+), 102 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d5630e535836..93bcd81d645d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -886,6 +886,7 @@ struct bpf_prog_offload_ops;
>  struct netlink_ext_ack;
>  struct xdp_umem;
>  struct xdp_dev_bulk_queue;
> +struct bpf_xdp_link;
>  
>  enum bpf_xdp_mode {
>  	XDP_MODE_SKB = 0,
> @@ -896,6 +897,7 @@ enum bpf_xdp_mode {
>  
>  struct bpf_xdp_entity {
>  	struct bpf_prog *prog;
> +	struct bpf_xdp_link *link;
>  };
>  
>  struct netdev_bpf {
> @@ -3824,6 +3826,10 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
>  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
>  		      int fd, int expected_fd, u32 flags);
>  u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
> +
> +struct bpf_xdp_link;

already stated above.

> +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> +
>  int xdp_umem_query(struct net_device *dev, u16 queue_id);
>  
>  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 548a749aebb3..41eba148217b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -227,6 +227,7 @@ enum bpf_attach_type {
>  	BPF_CGROUP_INET6_GETSOCKNAME,
>  	BPF_XDP_DEVMAP,
>  	BPF_CGROUP_INET_SOCK_RELEASE,
> +	BPF_XDP,

This really does not add value for the uapi. The link_type uniquely
identifies the type and the expected program type.

>  	__MAX_BPF_ATTACH_TYPE
>  };
>  
> @@ -239,6 +240,7 @@ enum bpf_link_type {
>  	BPF_LINK_TYPE_CGROUP = 3,
>  	BPF_LINK_TYPE_ITER = 4,
>  	BPF_LINK_TYPE_NETNS = 5,
> +	BPF_LINK_TYPE_XDP = 6,
>  
>  	MAX_BPF_LINK_TYPE,
>  };
> @@ -604,7 +606,10 @@ union bpf_attr {
>  
>  	struct { /* struct used by BPF_LINK_CREATE command */
>  		__u32		prog_fd;	/* eBPF program to attach */
> -		__u32		target_fd;	/* object to attach to */
> +		union {
> +			__u32		target_fd;	/* object to attach to */
> +			__u32		target_ifindex; /* target ifindex */
> +		};
>  		__u32		attach_type;	/* attach type */
>  		__u32		flags;		/* extra flags */
>  	} link_create;
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 156f51ffada2..eb4ed4b29418 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2817,6 +2817,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
>  		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
>  	case BPF_TRACE_ITER:
>  		return BPF_PROG_TYPE_TRACING;
> +	case BPF_XDP:
> +		return BPF_PROG_TYPE_XDP;
>  	default:
>  		return BPF_PROG_TYPE_UNSPEC;
>  	}
> @@ -3890,6 +3892,9 @@ static int link_create(union bpf_attr *attr)
>  	case BPF_PROG_TYPE_FLOW_DISSECTOR:
>  		ret = netns_bpf_link_create(attr, prog);
>  		break;
> +	case BPF_PROG_TYPE_XDP:
> +		ret = bpf_xdp_link_attach(attr, prog);
> +		break;
>  	default:
>  		ret = -EINVAL;
>  	}
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d3b82b664e2d..84f755a1ec36 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -8713,8 +8713,47 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
>  }
>  EXPORT_SYMBOL(dev_change_proto_down_generic);
>  
> -static struct bpf_prog *dev_xdp_prog(struct net_device *dev, enum bpf_xdp_mode mode)
> +struct bpf_xdp_link {
> +	struct bpf_link link;
> +	struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
> +	int flags;
> +};
> +
> +static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
> +{
> +	if (flags & XDP_FLAGS_HW_MODE)
> +		return XDP_MODE_HW;
> +	if (flags & XDP_FLAGS_DRV_MODE)
> +		return XDP_MODE_DRV;
> +	return XDP_MODE_SKB;
> +}
> +
> +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
>  {
> +	switch (mode) {
> +	case XDP_MODE_SKB:
> +		return generic_xdp_install;
> +	case XDP_MODE_DRV:
> +	case XDP_MODE_HW:
> +		return dev->netdev_ops->ndo_bpf;
> +	default:
> +		return NULL;
> +	};
> +}
> +
> +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
> +					 enum bpf_xdp_mode mode)
> +{
> +	return dev->xdp_state[mode].link;
> +}
> +
> +static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
> +				     enum bpf_xdp_mode mode)
> +{
> +	struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
> +
> +	if (link)
> +		return link->link.prog;
>  	return dev->xdp_state[mode].prog;
>  }
>  
> @@ -8725,9 +8764,17 @@ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
>  	return prog ? prog->aux->id : 0;
>  }
>  
> +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
> +			     struct bpf_xdp_link *link)
> +{
> +	dev->xdp_state[mode].link = link;
> +	dev->xdp_state[mode].prog = NULL;
> +}
> +
>  static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
>  			     struct bpf_prog *prog)
>  {
> +	dev->xdp_state[mode].link = NULL;
>  	dev->xdp_state[mode].prog = prog;
>  }
>  
> @@ -8744,6 +8791,14 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
>  	xdp.flags = flags;
>  	xdp.prog = prog;
>  
> +	/* Drivers assume refcnt is already incremented (i.e, prog pointer is
> +	 * "moved" into driver), so they don't increment it on their own, but
> +	 * they do decrement refcnt when program is detached or replaced.
> +	 * Given net_device also owns link/prog, we need to bump refcnt here
> +	 * to prevent drivers from underflowing it.
> +	 */
> +	if (prog)
> +		bpf_prog_inc(prog);

Why is this refcnt bump not needed today but is needed for your change?

>  	err = bpf_op(dev, &xdp);
>  	if (err)
>  		return err;

and the error path is not decrementing it.

> @@ -8756,39 +8811,221 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
>  
>  static void dev_xdp_uninstall(struct net_device *dev)
>  {
> -	bpf_op_t ndo_bpf;
> +	struct bpf_xdp_link *link;
> +	struct bpf_prog *prog;
> +	enum bpf_xdp_mode mode;
> +	bpf_op_t bpf_op;
>  
> -	/* Remove generic XDP */
> -	WARN_ON(dev_xdp_install(dev, XDP_MODE_SKB, generic_xdp_install,
> -				NULL, 0, NULL));
> -	dev_xdp_set_prog(dev, XDP_MODE_SKB, NULL);
> +	ASSERT_RTNL();
>  
> -	/* Remove from the driver */
> -	ndo_bpf = dev->netdev_ops->ndo_bpf;
> -	if (!ndo_bpf)
> -		return;
> +	for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
> +		prog = dev_xdp_prog(dev, mode);
> +		if (!prog)
> +			continue;
> +
> +		bpf_op = dev_xdp_bpf_op(dev, mode);
> +		WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
> +
> +		/* auto-detach link from net device */
> +		link = dev_xdp_link(dev, mode);
> +		if (link)
> +			link->dev = NULL;
> +		else
> +			bpf_prog_put(prog);
> +
> +		dev_xdp_set_link(dev, mode, NULL);
> +	}
> +}
> +
> +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
> +			  struct bpf_xdp_link *link, struct bpf_prog *new_prog,
> +			  struct bpf_prog *old_prog, u32 flags)
> +{
> +	struct bpf_prog *cur_prog;
> +	enum bpf_xdp_mode mode;
> +	bpf_op_t bpf_op;
> +	int err;
> +
> +	ASSERT_RTNL();
>  
> -	if (dev_xdp_prog_id(dev, XDP_MODE_DRV)) {
> -		WARN_ON(dev_xdp_install(dev, XDP_MODE_DRV, ndo_bpf,
> -					NULL, 0, NULL));
> -		dev_xdp_set_prog(dev, XDP_MODE_DRV, NULL);
> +	/* link supports only XDP mode flags */
> +	if (link && (flags & ~XDP_FLAGS_MODES))
> +		return -EINVAL;

everyone of the -errno returns needs an extack message explaining why

^ permalink raw reply	[flat|nested] 15+ messages in thread