All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: Jesper Dangaard Brouer <brouer@redhat.com>,
	netdev@vger.kernel.org, Daniel Borkmann <borkmann@iogearbox.net>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: "Christoph Hellwig" <hch@infradead.org>,
	BjörnTöpel <bjorn.topel@intel.com>,
	"Magnus Karlsson" <magnus.karlsson@intel.com>,
	makita.toshiaki@lab.ntt.co.jp
Subject: Re: [bpf-next V4 PATCH 2/8] bpf: devmap prepare xdp frames for bulking
Date: Wed, 23 May 2018 11:54:38 +0200	[thread overview]
Message-ID: <1db29c53-1568-a3b5-f2d3-1c830aefe33b@iogearbox.net> (raw)
In-Reply-To: <152665048175.21055.15345477060144555643.stgit@firesoul>

On 05/18/2018 03:34 PM, Jesper Dangaard Brouer wrote:
> Like cpumap create queue for xdp frames that will be bulked.  For now,
> this patch simply invoke ndo_xdp_xmit foreach frame.  This happens,
> either when the map flush operation is envoked, or when the limit
> DEV_MAP_BULK_SIZE is reached.
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>  kernel/bpf/devmap.c |   77 ++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 73 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index 808808bf2bf2..cab72c100bb5 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -54,11 +54,18 @@
>  #define DEV_CREATE_FLAG_MASK \
>  	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
>  
> +#define DEV_MAP_BULK_SIZE 16
> +struct xdp_bulk_queue {
> +	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
> +	unsigned int count;
> +};
> +
>  /* objects in the map */
>  struct bpf_dtab_netdev {
>  	struct net_device *dev; /* must be first member, due to tracepoint */
>  	struct bpf_dtab *dtab;
>  	unsigned int bit;
> +	struct xdp_bulk_queue __percpu *bulkq;
>  	struct rcu_head rcu;
>  };
>  
> @@ -209,6 +216,38 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
>  	__set_bit(bit, bitmap);
>  }
>  
> +static int bq_xmit_all(struct bpf_dtab_netdev *obj,
> +			 struct xdp_bulk_queue *bq)
> +{
> +	unsigned int processed = 0, drops = 0;
> +	struct net_device *dev = obj->dev;
> +	int i;
> +
> +	if (unlikely(!bq->count))
> +		return 0;
> +
> +	for (i = 0; i < bq->count; i++) {
> +		struct xdp_frame *xdpf = bq->q[i];
> +
> +		prefetch(xdpf);
> +	}
> +
> +	for (i = 0; i < bq->count; i++) {
> +		struct xdp_frame *xdpf = bq->q[i];
> +		int err;
> +
> +		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
> +		if (err) {
> +			drops++;
> +			xdp_return_frame(xdpf);
> +		}
> +		processed++;

This sort of thing makes it really hard to review. 'processed' and
'drops' are not read anywhere in this function. So I need to go and
check all the other patches whether there's further logic involved here
or not. I had to review your series after applying all patches in a
local branch, please never do this. Add the logic in a patch where it's
self-contained and obvious to review.

> +	}
> +	bq->count = 0;
> +
> +	return 0;
> +}
> +
>  /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
>   * from the driver before returning from its napi->poll() routine. The poll()
>   * routine is called either from busy_poll context or net_rx_action signaled
> @@ -224,6 +263,7 @@ void __dev_map_flush(struct bpf_map *map)
>  
>  	for_each_set_bit(bit, bitmap, map->max_entries) {
>  		struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
> +		struct xdp_bulk_queue *bq;
>  		struct net_device *netdev;
>  
>  		/* This is possible if the dev entry is removed by user space
> @@ -233,6 +273,9 @@ void __dev_map_flush(struct bpf_map *map)
>  			continue;
>  
>  		__clear_bit(bit, bitmap);
> +
> +		bq = this_cpu_ptr(dev->bulkq);
> +		bq_xmit_all(dev, bq);
>  		netdev = dev->dev;
>  		if (likely(netdev->netdev_ops->ndo_xdp_flush))
>  			netdev->netdev_ops->ndo_xdp_flush(netdev);
> @@ -255,6 +298,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
>  	return obj;
>  }
>  
> +/* Runs under RCU-read-side, plus in softirq under NAPI protection.
> + * Thus, safe percpu variable access.
> + */
> +static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
> +{
> +	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
> +
> +	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
> +		bq_xmit_all(obj, bq);
> +
> +	bq->q[bq->count++] = xdpf;
> +	return 0;
> +}
> +
>  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
>  {
>  	struct net_device *dev = dst->dev;
> @@ -268,8 +325,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
>  	if (unlikely(!xdpf))
>  		return -EOVERFLOW;
>  
> -	/* TODO: implement a bulking/enqueue step later */
> -	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
> +	err = bq_enqueue(dst, xdpf);
>  	if (err)
>  		return err;
>  
> @@ -288,13 +344,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
>  {
>  	if (dev->dev->netdev_ops->ndo_xdp_flush) {
>  		struct net_device *fl = dev->dev;
> +		struct xdp_bulk_queue *bq;
>  		unsigned long *bitmap;
> +
>  		int cpu;

Please remove the newline from above.

>  		for_each_online_cpu(cpu) {
>  			bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
>  			__clear_bit(dev->bit, bitmap);
>  
> +			bq = per_cpu_ptr(dev->bulkq, cpu);
> +			bq_xmit_all(dev, bq);
> +
>  			fl->netdev_ops->ndo_xdp_flush(dev->dev);
>  		}
>  	}
> @@ -306,6 +367,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
>  
>  	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
>  	dev_map_flush_old(dev);
> +	free_percpu(dev->bulkq);
>  	dev_put(dev->dev);
>  	kfree(dev);
>  }
> @@ -338,6 +400,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
>  {
>  	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
>  	struct net *net = current->nsproxy->net_ns;
> +	gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
>  	struct bpf_dtab_netdev *dev, *old_dev;
>  	u32 i = *(u32 *)key;
>  	u32 ifindex = *(u32 *)value;
> @@ -352,11 +415,17 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
>  	if (!ifindex) {
>  		dev = NULL;
>  	} else {
> -		dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
> -				   map->numa_node);
> +		dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
>  		if (!dev)
>  			return -ENOMEM;
>  
> +		dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
> +						sizeof(void *), gfp);
> +		if (!dev->bulkq) {
> +			kfree(dev);
> +			return -ENOMEM;
> +		}
> +
>  		dev->dev = dev_get_by_index(net, ifindex);
>  		if (!dev->dev) {
>  			kfree(dev);

Ahh well, and I just realized that here you are leaking memory in the error path. :(

A free_percpu(dev->bulkq) is missing.

Please fix this bug up and send a fresh series, so we can fix this right away without
needing a follow-up in bpf-next.

Thanks,
Daniel

  reply	other threads:[~2018-05-23  9:54 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-18 13:34 [bpf-next V4 PATCH 0/8] xdp: introduce bulking for ndo_xdp_xmit API Jesper Dangaard Brouer
2018-05-18 13:34 ` [bpf-next V4 PATCH 1/8] bpf: devmap introduce dev_map_enqueue Jesper Dangaard Brouer
2018-05-23  9:34   ` Daniel Borkmann
2018-05-23 11:12     ` Jesper Dangaard Brouer
2018-05-18 13:34 ` [bpf-next V4 PATCH 2/8] bpf: devmap prepare xdp frames for bulking Jesper Dangaard Brouer
2018-05-23  9:54   ` Daniel Borkmann [this message]
2018-05-23 10:29     ` Jesper Dangaard Brouer
2018-05-23 10:45       ` Daniel Borkmann
2018-05-23 10:38     ` Jesper Dangaard Brouer
2018-05-18 13:34 ` [bpf-next V4 PATCH 3/8] xdp: add tracepoint for devmap like cpumap have Jesper Dangaard Brouer
2018-05-23 14:24   ` John Fastabend
2018-05-23 15:04     ` Jesper Dangaard Brouer
2018-05-18 13:34 ` [bpf-next V4 PATCH 4/8] samples/bpf: xdp_monitor use tracepoint xdp:xdp_devmap_xmit Jesper Dangaard Brouer
2018-05-18 13:34 ` [bpf-next V4 PATCH 5/8] xdp: introduce xdp_return_frame_rx_napi Jesper Dangaard Brouer
2018-05-18 20:46   ` Jesper Dangaard Brouer
2018-05-18 13:35 ` [bpf-next V4 PATCH 6/8] xdp: change ndo_xdp_xmit API to support bulking Jesper Dangaard Brouer
2018-05-23 14:42   ` John Fastabend
2018-05-23 15:27     ` Jesper Dangaard Brouer
2018-05-18 13:35 ` [bpf-next V4 PATCH 7/8] xdp/trace: extend tracepoint in devmap with an err Jesper Dangaard Brouer
2018-05-18 20:49   ` Jesper Dangaard Brouer
2018-05-18 13:35 ` [bpf-next V4 PATCH 8/8] samples/bpf: xdp_monitor use err code from tracepoint xdp:xdp_devmap_xmit Jesper Dangaard Brouer
2018-05-18 20:48   ` Jesper Dangaard Brouer
2018-05-23  9:24 ` [bpf-next V4 PATCH 0/8] xdp: introduce bulking for ndo_xdp_xmit API Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1db29c53-1568-a3b5-f2d3-1c830aefe33b@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=alexei.starovoitov@gmail.com \
    --cc=bjorn.topel@intel.com \
    --cc=borkmann@iogearbox.net \
    --cc=brouer@redhat.com \
    --cc=hch@infradead.org \
    --cc=magnus.karlsson@intel.com \
    --cc=makita.toshiaki@lab.ntt.co.jp \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.