All of lore.kernel.org
 help / color / mirror / Atom feed
From: Haiyang Zhang <haiyangz@microsoft.com>
To: linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: haiyangz@microsoft.com, kys@microsoft.com,
	sthemmin@microsoft.com, paulros@microsoft.com,
	shacharr@microsoft.com, olaf@aepfle.de, vkuznets@redhat.com,
	davem@davemloft.net, linux-kernel@vger.kernel.org
Subject: [PATCH net-next] net: mana: Add XDP support
Date: Fri, 19 Nov 2021 16:29:53 -0800	[thread overview]
Message-ID: <1637368193-24538-1-git-send-email-haiyangz@microsoft.com> (raw)

Add support of XDP for the MANA driver.

Supported XDP actions:
	XDP_PASS, XDP_TX, XDP_DROP, XDP_ABORTED

XDP actions not yet supported:
	XDP_REDIRECT

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/Makefile  |   2 +-
 drivers/net/ethernet/microsoft/mana/mana.h    |  13 ++
 .../net/ethernet/microsoft/mana/mana_bpf.c    | 162 ++++++++++++++++++
 drivers/net/ethernet/microsoft/mana/mana_en.c |  69 ++++++--
 4 files changed, 235 insertions(+), 11 deletions(-)
 create mode 100644 drivers/net/ethernet/microsoft/mana/mana_bpf.c

diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile
index 0edd5bb685f3..e16a4221f571 100644
--- a/drivers/net/ethernet/microsoft/mana/Makefile
+++ b/drivers/net/ethernet/microsoft/mana/Makefile
@@ -3,4 +3,4 @@
 # Makefile for the Microsoft Azure Network Adapter driver
 
 obj-$(CONFIG_MICROSOFT_MANA) += mana.o
-mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o
+mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o mana_bpf.o
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index d047ee876f12..0c5553887b75 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -298,6 +298,9 @@ struct mana_rxq {
 
 	struct mana_stats stats;
 
+	struct bpf_prog __rcu *bpf_prog;
+	struct xdp_rxq_info xdp_rxq;
+
 	/* MUST BE THE LAST MEMBER:
 	 * Each receive buffer has an associated mana_recv_buf_oob.
 	 */
@@ -353,6 +356,8 @@ struct mana_port_context {
 	/* This points to an array of num_queues of RQ pointers. */
 	struct mana_rxq **rxqs;
 
+	struct bpf_prog *bpf_prog;
+
 	/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
 	unsigned int max_queues;
 	unsigned int num_queues;
@@ -367,6 +372,7 @@ struct mana_port_context {
 	struct mana_ethtool_stats eth_stats;
 };
 
+int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev);
 int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx,
 		    bool update_hash, bool update_tab);
 
@@ -377,6 +383,13 @@ int mana_detach(struct net_device *ndev, bool from_close);
 int mana_probe(struct gdma_dev *gd, bool resuming);
 void mana_remove(struct gdma_dev *gd, bool suspending);
 
+void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
+u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
+		 struct xdp_buff *xdp, void *buf_va, uint pkt_len);
+struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
+void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
+int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
+
 extern const struct ethtool_ops mana_ethtool_ops;
 
 struct mana_obj_spec {
diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
new file mode 100644
index 000000000000..1bc8ff388341
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/mm.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+
+#include "mana.h"
+
+void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev)
+{
+	u16 txq_idx = skb_get_queue_mapping(skb);
+	struct netdev_queue *ndevtxq;
+	int rc;
+
+	__skb_push(skb, ETH_HLEN);
+
+	ndevtxq = netdev_get_tx_queue(ndev, txq_idx);
+	__netif_tx_lock(ndevtxq, smp_processor_id());
+
+	rc = mana_start_xmit(skb, ndev);
+
+	__netif_tx_unlock(ndevtxq);
+
+	if (dev_xmit_complete(rc))
+		return;
+
+	dev_kfree_skb_any(skb);
+	ndev->stats.tx_dropped++;
+}
+
+u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
+		 struct xdp_buff *xdp, void *buf_va, uint pkt_len)
+{
+	struct bpf_prog *prog;
+	u32 act = XDP_PASS;
+
+	rcu_read_lock();
+	prog = rcu_dereference(rxq->bpf_prog);
+
+	if (!prog)
+		goto out;
+
+	xdp_init_buff(xdp, PAGE_SIZE, &rxq->xdp_rxq);
+	xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, false);
+
+	act = bpf_prog_run_xdp(prog, xdp);
+
+	switch (act) {
+	case XDP_PASS:
+	case XDP_TX:
+	case XDP_DROP:
+		break;
+
+	case XDP_ABORTED:
+		trace_xdp_exception(ndev, prog, act);
+		break;
+
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	}
+
+out:
+	rcu_read_unlock();
+
+	return act;
+}
+
+static unsigned int mana_xdp_fraglen(unsigned int len)
+{
+	return SKB_DATA_ALIGN(len) +
+	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+struct bpf_prog *mana_xdp_get(struct mana_port_context *apc)
+{
+	ASSERT_RTNL();
+
+	return apc->bpf_prog;
+}
+
+static struct bpf_prog *mana_chn_xdp_get(struct mana_port_context *apc)
+{
+	return rtnl_dereference(apc->rxqs[0]->bpf_prog);
+}
+
+/* Set xdp program on channels */
+void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog = mana_chn_xdp_get(apc);
+	unsigned int num_queues = apc->num_queues;
+	int i;
+
+	ASSERT_RTNL();
+
+	if (old_prog == prog)
+		return;
+
+	if (prog)
+		bpf_prog_add(prog, num_queues);
+
+	for (i = 0; i < num_queues; i++)
+		rcu_assign_pointer(apc->rxqs[i]->bpf_prog, prog);
+
+	if (old_prog)
+		for (i = 0; i < num_queues; i++)
+			bpf_prog_put(old_prog);
+}
+
+static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
+			struct netlink_ext_ack *extack)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	struct bpf_prog *old_prog;
+	int buf_max;
+
+	old_prog = mana_xdp_get(apc);
+
+	if (!old_prog && !prog)
+		return 0;
+
+	buf_max = XDP_PACKET_HEADROOM + mana_xdp_fraglen(ndev->mtu + ETH_HLEN);
+	if (prog && buf_max > PAGE_SIZE) {
+		netdev_err(ndev, "XDP: mtu:%u too large, buf_max:%u\n",
+			   ndev->mtu, buf_max);
+		NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
+
+		return -EOPNOTSUPP;
+	}
+
+	/* One refcnt of the prog is hold by the caller already, so
+	 * don't increase refcnt for this one.
+	 */
+	apc->bpf_prog = prog;
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (apc->port_is_up)
+		mana_chn_setxdp(apc, prog);
+
+	return 0;
+}
+
+int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
+{
+	struct netlink_ext_ack *extack = bpf->extack;
+	int ret;
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return mana_xdp_set(ndev, bpf->prog, extack);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 72cbf45c42d8..c1d5a374b967 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -125,7 +125,7 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
 	return -ENOMEM;
 }
 
-static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
 	struct mana_port_context *apc = netdev_priv(ndev);
@@ -378,6 +378,7 @@ static const struct net_device_ops mana_devops = {
 	.ndo_start_xmit		= mana_start_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_get_stats64	= mana_get_stats64,
+	.ndo_bpf		= mana_bpf,
 };
 
 static void mana_cleanup_port_context(struct mana_port_context *apc)
@@ -906,6 +907,25 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
 	WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
 }
 
+static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
+				      struct xdp_buff *xdp)
+{
+	struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE);
+
+	if (!skb)
+		return NULL;
+
+	if (xdp->data_hard_start) {
+		skb_reserve(skb, xdp->data - xdp->data_hard_start);
+		skb_put(skb, xdp->data_end - xdp->data);
+	} else {
+		skb_reserve(skb, XDP_PACKET_HEADROOM);
+		skb_put(skb, pkt_len);
+	}
+
+	return skb;
+}
+
 static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 			struct mana_rxq *rxq)
 {
@@ -914,8 +934,10 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 	uint pkt_len = cqe->ppi[0].pkt_len;
 	u16 rxq_idx = rxq->rxq_idx;
 	struct napi_struct *napi;
+	struct xdp_buff xdp = {};
 	struct sk_buff *skb;
 	u32 hash_value;
+	u32 act;
 
 	rxq->rx_cq.work_done++;
 	napi = &rxq->rx_cq.napi;
@@ -925,15 +947,16 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 		return;
 	}
 
-	skb = build_skb(buf_va, PAGE_SIZE);
+	act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
 
-	if (!skb) {
-		free_page((unsigned long)buf_va);
-		++ndev->stats.rx_dropped;
-		return;
-	}
+	if (act != XDP_PASS && act != XDP_TX)
+		goto drop;
+
+	skb = mana_build_skb(buf_va, pkt_len, &xdp);
+
+	if (!skb)
+		goto drop;
 
-	skb_put(skb, pkt_len);
 	skb->dev = napi->dev;
 
 	skb->protocol = eth_type_trans(skb, ndev);
@@ -954,12 +977,24 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
 	}
 
+	if (act == XDP_TX) {
+		skb_set_queue_mapping(skb, rxq_idx);
+		mana_xdp_tx(skb, ndev);
+		return;
+	}
+
 	napi_gro_receive(napi, skb);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->packets++;
 	rx_stats->bytes += pkt_len;
 	u64_stats_update_end(&rx_stats->syncp);
+	return;
+
+drop:
+	free_page((unsigned long)buf_va);
+	++ndev->stats.rx_dropped;
+	return;
 }
 
 static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
@@ -1016,7 +1051,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
 	new_page = alloc_page(GFP_ATOMIC);
 
 	if (new_page) {
-		da = dma_map_page(dev, new_page, 0, rxq->datasize,
+		da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
 				  DMA_FROM_DEVICE);
 
 		if (dma_mapping_error(dev, da)) {
@@ -1291,6 +1326,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
 		napi_synchronize(napi);
 
 	napi_disable(napi);
+
+	xdp_rxq_info_unreg(&rxq->xdp_rxq);
+
 	netif_napi_del(napi);
 
 	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
@@ -1342,7 +1380,8 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
 		if (!page)
 			return -ENOMEM;
 
-		da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE);
+		da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize,
+				  DMA_FROM_DEVICE);
 
 		if (dma_mapping_error(dev, da)) {
 			__free_page(page);
@@ -1485,6 +1524,12 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	gc->cq_table[cq->gdma_id] = cq->gdma_cq;
 
 	netif_napi_add(ndev, &cq->napi, mana_poll, 1);
+
+	WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
+				 cq->napi.napi_id));
+	WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
+					   MEM_TYPE_PAGE_SHARED, NULL));
+
 	napi_enable(&cq->napi);
 
 	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
@@ -1650,6 +1695,8 @@ int mana_alloc_queues(struct net_device *ndev)
 	if (err)
 		goto destroy_vport;
 
+	mana_chn_setxdp(apc, mana_xdp_get(apc));
+
 	return 0;
 
 destroy_vport:
@@ -1698,6 +1745,8 @@ static int mana_dealloc_queues(struct net_device *ndev)
 	if (apc->port_is_up)
 		return -EINVAL;
 
+	mana_chn_setxdp(apc, NULL);
+
 	/* No packet can be transmitted now since apc->port_is_up is false.
 	 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
 	 * a txq because it may not timely see apc->port_is_up being cleared
-- 
2.25.1


                 reply	other threads:[~2021-11-20  0:30 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1637368193-24538-1-git-send-email-haiyangz@microsoft.com \
    --to=haiyangz@microsoft.com \
    --cc=davem@davemloft.net \
    --cc=kys@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=olaf@aepfle.de \
    --cc=paulros@microsoft.com \
    --cc=shacharr@microsoft.com \
    --cc=sthemmin@microsoft.com \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.