All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Björn Töpel" <bjorn.topel@gmail.com>
To: "Toke Høiland-Jørgensen" <toke@redhat.com>
Cc: Netdev <netdev@vger.kernel.org>, bpf <bpf@vger.kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Alexei Starovoitov <ast@kernel.org>,
	David Miller <davem@davemloft.net>,
	Jesper Dangaard Brouer <brouer@redhat.com>,
	John Fastabend <john.fastabend@gmail.com>
Subject: Re: [PATCH bpf-next 1/2] xdp: Move devmap bulk queue into struct net_device
Date: Fri, 10 Jan 2020 16:03:47 +0100	[thread overview]
Message-ID: <CAJ+HfNgkouU8=T2+Of1nAfwBQ-eqCKKAqrNzhhEafw5qW8bO_w@mail.gmail.com> (raw)
In-Reply-To: <157866612285.432695.6722430952732620313.stgit@toke.dk>

On Fri, 10 Jan 2020 at 15:22, Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
> From: Toke Høiland-Jørgensen <toke@redhat.com>
>
> Commit 96360004b862 ("xdp: Make devmap flush_list common for all map
> instances"), changed devmap flushing to be a global operation instead of a
> per-map operation. However, the queue structure used for bulking was still
> allocated as part of the containing map.
>
> This patch moves the devmap bulk queue into struct net_device. The
> motivation for this is reusing it for the non-map variant of XDP_REDIRECT,
> which will be changed in a subsequent commit.
>
> We defer the actual allocation of the bulk queue structure until the
> NETDEV_REGISTER notification devmap.c. This makes it possible to check for
> ndo_xdp_xmit support before allocating the structure, which is not possible
> at the time struct net_device is allocated. However, we keep the freeing in
> free_netdev() to avoid adding another RCU callback on NETDEV_UNREGISTER.
>
> Because of this change, we lose the reference back to the map that
> originated the redirect, so change the tracepoint to always return 0 as the
> map ID and index. Otherwise no functional change is intended with this
> patch.
>

Nice work, Toke!

I'm getting some checkpatch warnings (>80 char lines), other than that:

Acked-by: Björn Töpel <bjorn.topel@intel.com>

> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> ---
>  include/linux/netdevice.h  |    3 ++
>  include/trace/events/xdp.h |    2 +
>  kernel/bpf/devmap.c        |   61 ++++++++++++++++++--------------------------
>  net/core/dev.c             |    2 +
>  4 files changed, 31 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 2741aa35bec6..1b2bc2a7522e 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -876,6 +876,7 @@ enum bpf_netdev_command {
>  struct bpf_prog_offload_ops;
>  struct netlink_ext_ack;
>  struct xdp_umem;
> +struct xdp_dev_bulk_queue;
>
>  struct netdev_bpf {
>         enum bpf_netdev_command command;
> @@ -1993,6 +1994,8 @@ struct net_device {
>         spinlock_t              tx_global_lock;
>         int                     watchdog_timeo;
>
> +       struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
> +
>  #ifdef CONFIG_XPS
>         struct xps_dev_maps __rcu *xps_cpus_map;
>         struct xps_dev_maps __rcu *xps_rxqs_map;
> diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
> index a7378bcd9928..72bad13d4a3c 100644
> --- a/include/trace/events/xdp.h
> +++ b/include/trace/events/xdp.h
> @@ -278,7 +278,7 @@ TRACE_EVENT(xdp_devmap_xmit,
>         ),
>
>         TP_fast_assign(
> -               __entry->map_id         = map->id;
> +               __entry->map_id         = map ? map->id : 0;
>                 __entry->act            = XDP_REDIRECT;
>                 __entry->map_index      = map_index;
>                 __entry->drops          = drops;
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index da9c832fc5c8..bcb05cb6b728 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -53,13 +53,11 @@
>         (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
>
>  #define DEV_MAP_BULK_SIZE 16
> -struct bpf_dtab_netdev;
> -
> -struct xdp_bulk_queue {
> +struct xdp_dev_bulk_queue {
>         struct xdp_frame *q[DEV_MAP_BULK_SIZE];
>         struct list_head flush_node;
> +       struct net_device *dev;
>         struct net_device *dev_rx;
> -       struct bpf_dtab_netdev *obj;
>         unsigned int count;
>  };
>
> @@ -67,9 +65,8 @@ struct bpf_dtab_netdev {
>         struct net_device *dev; /* must be first member, due to tracepoint */
>         struct hlist_node index_hlist;
>         struct bpf_dtab *dtab;
> -       struct xdp_bulk_queue __percpu *bulkq;
>         struct rcu_head rcu;
> -       unsigned int idx; /* keep track of map index for tracepoint */
> +       unsigned int idx;
>  };
>
>  struct bpf_dtab {
> @@ -219,7 +216,6 @@ static void dev_map_free(struct bpf_map *map)
>
>                         hlist_for_each_entry_safe(dev, next, head, index_hlist) {
>                                 hlist_del_rcu(&dev->index_hlist);
> -                               free_percpu(dev->bulkq);
>                                 dev_put(dev->dev);
>                                 kfree(dev);
>                         }
> @@ -234,7 +230,6 @@ static void dev_map_free(struct bpf_map *map)
>                         if (!dev)
>                                 continue;
>
> -                       free_percpu(dev->bulkq);
>                         dev_put(dev->dev);
>                         kfree(dev);
>                 }
> @@ -320,10 +315,9 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
>         return -ENOENT;
>  }
>
> -static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
> +static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
>  {
> -       struct bpf_dtab_netdev *obj = bq->obj;
> -       struct net_device *dev = obj->dev;
> +       struct net_device *dev = bq->dev;
>         int sent = 0, drops = 0, err = 0;
>         int i;
>
> @@ -346,8 +340,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
>  out:
>         bq->count = 0;
>
> -       trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx,
> -                             sent, drops, bq->dev_rx, dev, err);
> +       trace_xdp_devmap_xmit(NULL, 0, sent, drops, bq->dev_rx, dev, err);
>         bq->dev_rx = NULL;
>         __list_del_clearprev(&bq->flush_node);
>         return 0;
> @@ -374,7 +367,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
>  void __dev_map_flush(void)
>  {
>         struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
> -       struct xdp_bulk_queue *bq, *tmp;
> +       struct xdp_dev_bulk_queue *bq, *tmp;
>
>         rcu_read_lock();
>         list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
> @@ -401,12 +394,12 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
>  /* Runs under RCU-read-side, plus in softirq under NAPI protection.
>   * Thus, safe percpu variable access.
>   */
> -static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
> +static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
>                       struct net_device *dev_rx)
>
>  {
>         struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
> -       struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
> +       struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
>
>         if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
>                 bq_xmit_all(bq, 0);
> @@ -444,7 +437,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>         if (unlikely(!xdpf))
>                 return -EOVERFLOW;
>
> -       return bq_enqueue(dst, xdpf, dev_rx);
> +       return bq_enqueue(dev, xdpf, dev_rx);
>  }
>
>  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
> @@ -483,7 +476,6 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
>         struct bpf_dtab_netdev *dev;
>
>         dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
> -       free_percpu(dev->bulkq);
>         dev_put(dev->dev);
>         kfree(dev);
>  }
> @@ -538,30 +530,14 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
>                                                     u32 ifindex,
>                                                     unsigned int idx)
>  {
> -       gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
>         struct bpf_dtab_netdev *dev;
> -       struct xdp_bulk_queue *bq;
> -       int cpu;
>
> -       dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
> +       dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, dtab->map.numa_node);
>         if (!dev)
>                 return ERR_PTR(-ENOMEM);
>
> -       dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
> -                                       sizeof(void *), gfp);
> -       if (!dev->bulkq) {
> -               kfree(dev);
> -               return ERR_PTR(-ENOMEM);
> -       }
> -
> -       for_each_possible_cpu(cpu) {
> -               bq = per_cpu_ptr(dev->bulkq, cpu);
> -               bq->obj = dev;
> -       }
> -
>         dev->dev = dev_get_by_index(net, ifindex);
>         if (!dev->dev) {
> -               free_percpu(dev->bulkq);
>                 kfree(dev);
>                 return ERR_PTR(-EINVAL);
>         }
> @@ -721,9 +697,22 @@ static int dev_map_notification(struct notifier_block *notifier,
>  {
>         struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
>         struct bpf_dtab *dtab;
> -       int i;
> +       int i, cpu;
>
>         switch (event) {
> +       case NETDEV_REGISTER:
> +               if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
> +                       break;
> +
> +               /* will be freed in free_netdev() */
> +               netdev->xdp_bulkq = __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue),
> +                                                      sizeof(void *), GFP_ATOMIC);
> +               if (!netdev->xdp_bulkq)
> +                       return NOTIFY_BAD;
> +
> +               for_each_possible_cpu(cpu)
> +                       per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
> +               break;
>         case NETDEV_UNREGISTER:
>                 /* This rcu_read_lock/unlock pair is needed because
>                  * dev_map_list is an RCU list AND to ensure a delete
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d99f88c58636..e7802a41ae7f 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -9847,6 +9847,8 @@ void free_netdev(struct net_device *dev)
>
>         free_percpu(dev->pcpu_refcnt);
>         dev->pcpu_refcnt = NULL;
> +       free_percpu(dev->xdp_bulkq);
> +       dev->xdp_bulkq = NULL;
>
>         netdev_unregister_lockdep_key(dev);
>
>

  reply	other threads:[~2020-01-10 15:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-10 14:22 [PATCH bpf-next 0/2] xdp: Introduce bulking for non-map XDP_REDIRECT Toke Høiland-Jørgensen
2020-01-10 14:22 ` [PATCH bpf-next 1/2] xdp: Move devmap bulk queue into struct net_device Toke Høiland-Jørgensen
2020-01-10 15:03   ` Björn Töpel [this message]
2020-01-10 15:26     ` Toke Høiland-Jørgensen
2020-01-10 16:08   ` Jesper Dangaard Brouer
2020-01-10 22:34     ` Toke Høiland-Jørgensen
2020-01-10 22:46       ` Eric Dumazet
2020-01-10 23:16         ` Toke Høiland-Jørgensen
2020-01-10 14:22 ` [PATCH bpf-next 2/2] xdp: Use bulking for non-map XDP_REDIRECT Toke Høiland-Jørgensen
2020-01-10 15:15   ` Björn Töpel
2020-01-10 15:30     ` Toke Høiland-Jørgensen
2020-01-10 15:54       ` Björn Töpel
2020-01-10 15:57         ` Toke Høiland-Jørgensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAJ+HfNgkouU8=T2+Of1nAfwBQ-eqCKKAqrNzhhEafw5qW8bO_w@mail.gmail.com' \
    --to=bjorn.topel@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=john.fastabend@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=toke@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.