Linux-HyperV Archive on lore.kernel.org
 help / color / Atom feed
From: Jakub Kicinski <jakub.kicinski@netronome.com>
To: Haiyang Zhang <haiyangz@microsoft.com>
Cc: "sashal@kernel.org" <sashal@kernel.org>,
	"linux-hyperv@vger.kernel.org" <linux-hyperv@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	KY Srinivasan <kys@microsoft.com>,
	Stephen Hemminger <sthemmin@microsoft.com>,
	"olaf@aepfle.de" <olaf@aepfle.de>, vkuznets <vkuznets@redhat.com>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH net-next, 3/4] hv_netvsc: Add XDP support
Date: Mon, 28 Oct 2019 14:33:22 -0700
Message-ID: <20191028143322.45d81da4@cakuba.hsd1.ca.comcast.net> (raw)
In-Reply-To: <1572296801-4789-4-git-send-email-haiyangz@microsoft.com>

On Mon, 28 Oct 2019 21:07:04 +0000, Haiyang Zhang wrote:
> This patch adds support of XDP in native mode for hv_netvsc driver, and
> transparently sets the XDP program on the associated VF NIC as well.
> 
> XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO
> before running XDP:
>         ethtool -K eth0 lro off
> 
> XDP actions not yet supported:
>         XDP_TX, XDP_REDIRECT

I don't think we want to merge support without at least XDP_TX these
days..

And without the ability to prepend headers this may be the least
complete initial XDP implementation we've seen :(

> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index d22a36f..688487b 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -122,8 +122,10 @@ static void free_netvsc_device(struct rcu_head *head)
>  	vfree(nvdev->send_buf);
>  	kfree(nvdev->send_section_map);
>  
> -	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
> +	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
> +		xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
>  		vfree(nvdev->chan_table[i].mrc.slots);
> +	}
>  
>  	kfree(nvdev);
>  }
> @@ -1370,6 +1372,10 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
>  		nvchan->net_device = net_device;
>  		u64_stats_init(&nvchan->tx_stats.syncp);
>  		u64_stats_init(&nvchan->rx_stats.syncp);
> +
> +		xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i);
> +		xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
> +					   MEM_TYPE_PAGE_SHARED, NULL);

These can fail.

>  	}
>  
>  	/* Enable NAPI handler before init callbacks */
> diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
> new file mode 100644
> index 0000000..4d235ac
> --- /dev/null
> +++ b/drivers/net/hyperv/netvsc_bpf.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2019, Microsoft Corporation.
> + *
> + * Author:
> + *   Haiyang Zhang <haiyangz@microsoft.com>
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/netdevice.h>
> +#include <linux/etherdevice.h>
> +#include <linux/ethtool.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> +#include <linux/kernel.h>
> +#include <net/xdp.h>
> +
> +#include <linux/mutex.h>
> +#include <linux/rtnetlink.h>
> +
> +#include "hyperv_net.h"
> +
> +u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
> +		   void **p_pbuf)
> +{
> +	struct page *page = NULL;
> +	void *data = nvchan->rsc.data[0];
> +	u32 len = nvchan->rsc.len[0];
> +	void *pbuf = data;
> +	struct bpf_prog *prog;
> +	struct xdp_buff xdp;
> +	u32 act = XDP_PASS;
> +
> +	*p_pbuf = NULL;
> +
> +	rcu_read_lock();
> +	prog = rcu_dereference(nvchan->bpf_prog);
> +
> +	if (!prog || nvchan->rsc.cnt > 1)

Can rsc.cnt == 1 not be ensured at setup time? This looks quite
limiting if random frames could be forced to bypass the filter.

> +		goto out;
> +
> +	/* copy to a new page buffer if data are not within a page */
> +	if (virt_to_page(data) != virt_to_page(data + len - 1)) {
> +		page = alloc_page(GFP_ATOMIC);
> +		if (!page)
> +			goto out;

Returning XDP_PASS on allocation failure seems highly questionable.

> +		pbuf = page_address(page);
> +		memcpy(pbuf, nvchan->rsc.data[0], len);
> +
> +		*p_pbuf = pbuf;
> +	}
> +
> +	xdp.data_hard_start = pbuf;
> +	xdp.data = xdp.data_hard_start;

This patch also doesn't add any headroom for XDP to prepend data :(

> +	xdp_set_data_meta_invalid(&xdp);
> +	xdp.data_end = xdp.data + len;
> +	xdp.rxq = &nvchan->xdp_rxq;
> +	xdp.handle = 0;
> +
> +	act = bpf_prog_run_xdp(prog, &xdp);
> +
> +	switch (act) {
> +	case XDP_PASS:
> +		/* Pass to upper layers */
> +		break;
> +
> +	case XDP_ABORTED:
> +		trace_xdp_exception(ndev, prog, act);
> +		break;
> +
> +	case XDP_DROP:
> +		break;
> +
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +	}
> +
> +out:
> +	rcu_read_unlock();
> +
> +	if (page && act != XDP_PASS) {
> +		*p_pbuf = NULL;
> +		__free_page(page);
> +	}
> +
> +	return act;
> +}
> +
> +unsigned int netvsc_xdp_fraglen(unsigned int len)
> +{
> +	return SKB_DATA_ALIGN(len) +
> +	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> +}
> +
> +struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
> +{
> +	return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
> +}
> +
> +int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> +		   struct netvsc_device *nvdev)
> +{
> +	struct bpf_prog *old_prog;
> +	int frag_max, i;
> +
> +	old_prog = netvsc_xdp_get(nvdev);
> +
> +	if (!old_prog && !prog)
> +		return 0;

I think this case is now handled by the core.

> +	frag_max = netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
> +	if (prog && frag_max > PAGE_SIZE) {
> +		netdev_err(dev, "XDP: mtu:%u too large, frag:%u\n",
> +			   dev->mtu, frag_max);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (prog && (dev->features & NETIF_F_LRO)) {
> +		netdev_err(dev, "XDP: not support LRO\n");

Please report this via extack, that way users will see it in the console
in which they're installing the program.

> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (prog) {
> +		prog = bpf_prog_add(prog, nvdev->num_chn);
> +		if (IS_ERR(prog))
> +			return PTR_ERR(prog);
> +	}
> +
> +	for (i = 0; i < nvdev->num_chn; i++)
> +		rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);
> +
> +	if (old_prog)
> +		for (i = 0; i < nvdev->num_chn; i++)
> +			bpf_prog_put(old_prog);
> +
> +	return 0;
> +}
> +
> +int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
> +{
> +	struct netdev_bpf xdp;
> +	bpf_op_t ndo_bpf;
> +
> +	ASSERT_RTNL();
> +
> +	if (!vf_netdev)
> +		return 0;
> +
> +	ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
> +	if (!ndo_bpf)
> +		return 0;
> +
> +	memset(&xdp, 0, sizeof(xdp));
> +
> +	xdp.command = XDP_SETUP_PROG;
> +	xdp.prog = prog;
> +
> +	return ndo_bpf(vf_netdev, &xdp);

IMHO the automatic propagation is not a good idea. Especially if the
propagation doesn't make the entire installation fail if VF doesn't
have ndo_bpf.

> +}

  reply index

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-28 21:06 [PATCH net-next, 0/4] hv_netvsc: Add XDP support and some error handling fixes Haiyang Zhang
2019-10-28 21:07 ` [PATCH net-next, 1/4] hv_netvsc: Fix error handling in netvsc_set_features() Haiyang Zhang
2019-10-28 21:07 ` [PATCH net-next, 2/4] hv_netvsc: Fix error handling in netvsc_attach() Haiyang Zhang
2019-11-01 20:42   ` Markus Elfring
2019-11-04 15:08     ` Haiyang Zhang
2019-10-28 21:07 ` [PATCH net-next, 3/4] hv_netvsc: Add XDP support Haiyang Zhang
2019-10-28 21:33   ` Jakub Kicinski [this message]
2019-10-29 19:17     ` Haiyang Zhang
2019-10-29 19:53       ` Jakub Kicinski
2019-10-29 20:01         ` Haiyang Zhang
2019-10-29 21:59       ` Stephen Hemminger
2019-10-29 22:08         ` Haiyang Zhang
2019-10-28 21:07 ` [PATCH net-next, 4/4] hv_netvsc: Update document for " Haiyang Zhang

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191028143322.45d81da4@cakuba.hsd1.ca.comcast.net \
    --to=jakub.kicinski@netronome.com \
    --cc=davem@davemloft.net \
    --cc=haiyangz@microsoft.com \
    --cc=kys@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=olaf@aepfle.de \
    --cc=sashal@kernel.org \
    --cc=sthemmin@microsoft.com \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-HyperV Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-hyperv/0 linux-hyperv/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-hyperv linux-hyperv/ https://lore.kernel.org/linux-hyperv \
		linux-hyperv@vger.kernel.org
	public-inbox-index linux-hyperv

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-hyperv


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git