DPDK-dev Archive on lore.kernel.org
 help / color / Atom feed
* [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd
@ 2019-05-15  8:38 Xiaolong Ye
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-15  8:38 UTC (permalink / raw)
  Cc: Qi Zhang, Karlsson Magnus, Topel Bjorn, dev, Xiaolong Ye

Hi,

This patch series mainly includes 3 new features for AF_XDP pmd. They
are separated independent features, the reason I take them in one
patchset is that they have code dependency.

1. zero copy

This patch enables `zero copy` between af_xdp umem and mbuf by using
external mbuf mechanism.

2. multi-queue

With mutli-queue support, one AF_XDP pmd instance can use multi netdev
queues.

3. busy-poll

With busy-poll, all processing occurs on a single core, performance is
better from a per-core perspective.

This patch has dependency on busy-poll support in kernel side and now it is in 
RFC stage [1].

[1] https://www.spinics.net/lists/netdev/msg568337.html 

Xiaolong Ye (3):
  net/af_xdp: enable zero copy by extbuf
  net/af_xdp: add multi-queue support
  net/af_xdp: add busy poll support

 doc/guides/nics/af_xdp.rst          |   5 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c | 210 +++++++++++++++++++++-------
 2 files changed, 160 insertions(+), 55 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf
  2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
@ 2019-05-15  8:38 ` Xiaolong Ye
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-15  8:38 UTC (permalink / raw)
  To: Xiaolong Ye, Qi Zhang, John McNamara, Marko Kovacevic
  Cc: Karlsson Magnus, Topel Bjorn, dev

Implement zero copy of af_xdp pmd through mbuf's external memory
mechanism to achieve high performance.

This patch also provides a new parameter "pmd_zero_copy" for user, so they
can choose to enable zero copy of af_xdp pmd or not.

To be clear, "zero copy" here is different from the "zero copy mode" of
AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk
application.

Suggested-by: Varghese Vipin <vipin.varghese@intel.com>
Suggested-by: Olivier Matz <olivier.matz@6wind.com>
Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |  1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c | 96 +++++++++++++++++++++++------
 2 files changed, 77 insertions(+), 20 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 175038153..0bd4239fe 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK.
 
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``queue`` - netdev queue id (optional, default 0);
+*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
 -------------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 35c72272c..ebef7bf34 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -70,6 +70,7 @@ struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
+	int pmd_zc;
 };
 
 struct rx_stats {
@@ -109,6 +110,7 @@ struct pmd_internals {
 	int if_index;
 	char if_name[IFNAMSIZ];
 	uint16_t queue_idx;
+	int pmd_zc;
 	struct ether_addr eth_addr;
 	struct xsk_umem_info *umem;
 	struct rte_mempool *mb_pool_share;
@@ -119,10 +121,12 @@ struct pmd_internals {
 
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_QUEUE_IDX_ARG		"queue"
+#define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_QUEUE_IDX_ARG,
+	ETH_AF_XDP_PMD_ZC_ARG,
 	NULL
 };
 
@@ -166,6 +170,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 	return 0;
 }
 
+static void
+umem_buf_release_to_fq(void *addr, void *opaque)
+{
+	struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
+	uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
+
+	rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
+}
+
 static uint16_t
 eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -175,6 +188,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &umem->fq;
 	uint32_t idx_rx = 0;
 	uint32_t free_thresh = fq->size >> 1;
+	int pmd_zc = umem->pmd_zc;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
 	unsigned long dropped = 0;
 	unsigned long rx_bytes = 0;
@@ -197,19 +211,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		uint64_t addr;
 		uint32_t len;
 		void *pkt;
+		uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
+		struct rte_mbuf_ext_shared_info *shinfo;
 
 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
 		addr = desc->addr;
 		len = desc->len;
 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
 
-		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
+		if (pmd_zc) {
+			shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
+					&buf_len, umem_buf_release_to_fq, umem);
+
+			rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
+						  shinfo);
+		} else {
+			rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
+							pkt, len);
+			rte_ring_enqueue(umem->buf_ring, (void *)addr);
+		}
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
 		rx_bytes += len;
 		bufs[i] = mbufs[i];
-
-		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 	}
 
 	xsk_ring_cons__release(rx, rcvd);
@@ -262,12 +286,21 @@ kick_tx(struct pkt_tx_queue *txq)
 	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
 }
 
+static inline bool
+in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
+{
+	uint64_t mz_base_addr = umem->mz->addr_64;
+
+	return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
+}
+
 static uint16_t
 eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->pair->umem;
 	struct rte_mbuf *mbuf;
+	int pmd_zc = umem->pmd_zc;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
@@ -294,16 +327,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
 		mbuf = bufs[i];
-
-		desc->addr = (uint64_t)addrs[i];
 		desc->len = mbuf->pkt_len;
-		pkt = xsk_umem__get_data(umem->mz->addr,
-					 desc->addr);
-		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-			   desc->len);
-		tx_bytes += mbuf->pkt_len;
 
-		rte_pktmbuf_free(mbuf);
+		/*
+		 * We need to make sure the external mbuf address is within
+		 * current port's umem memzone range
+		 */
+		if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
+				in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
+			desc->addr = (uint64_t)mbuf->buf_addr -
+				umem->mz->addr_64;
+			mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
+					(uint64_t)addrs[i]);
+		} else {
+			desc->addr = (uint64_t)addrs[i];
+			pkt = xsk_umem__get_data(umem->mz->addr,
+					desc->addr);
+			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+					desc->len);
+		}
+		tx_bytes += mbuf->pkt_len;
 	}
 
 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
@@ -313,6 +356,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	txq->stats.tx_pkts += nb_pkts;
 	txq->stats.tx_bytes += tx_bytes;
 
+	for (i = 0; i < nb_pkts; i++)
+		rte_pktmbuf_free(bufs[i]);
+
 	return nb_pkts;
 }
 
@@ -640,6 +686,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	internals->umem = rxq->umem;
+	internals->umem->pmd_zc = internals->pmd_zc;
+
+	if (internals->umem->pmd_zc)
+		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
 
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
@@ -775,9 +825,8 @@ parse_name_arg(const char *key __rte_unused,
 }
 
 static int
-parse_parameters(struct rte_kvargs *kvlist,
-		 char *if_name,
-		 int *queue_idx)
+parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
+				int *pmd_zc)
 {
 	int ret;
 
@@ -791,6 +840,11 @@ parse_parameters(struct rte_kvargs *kvlist,
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
+				 &parse_integer_arg, pmd_zc);
+	if (ret < 0)
+		goto free_kvlist;
+
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -827,9 +881,8 @@ get_iface_info(const char *if_name,
 }
 
 static struct rte_eth_dev *
-init_internals(struct rte_vdev_device *dev,
-	       const char *if_name,
-	       int queue_idx)
+init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
+					int pmd_zc)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -843,6 +896,7 @@ init_internals(struct rte_vdev_device *dev,
 		return NULL;
 
 	internals->queue_idx = queue_idx;
+	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
 	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
@@ -883,6 +937,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
+	int pmd_zc = 0;
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -909,7 +964,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	if (dev->device.numa_node == SOCKET_ID_ANY)
 		dev->device.numa_node = rte_socket_id();
 
-	if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) {
+	if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -919,7 +974,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -EINVAL;
 	}
 
-	eth_dev = init_internals(dev, if_name, xsk_queue_idx);
+	eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -961,7 +1016,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = {
 RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
 RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
-			      "queue=<int> ");
+			      "queue=<int> "
+			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)
 {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support
  2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
@ 2019-05-15  8:38 ` Xiaolong Ye
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support Xiaolong Ye
  2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
  3 siblings, 0 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-15  8:38 UTC (permalink / raw)
  To: Xiaolong Ye, Qi Zhang, John McNamara, Marko Kovacevic
  Cc: Karlsson Magnus, Topel Bjorn, dev

This patch adds two parameters `start_queue` and `queue_count` to
specify the range of netdev queues used by AF_XDP pmd.

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |  3 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c | 88 +++++++++++++++--------------
 2 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 0bd4239fe..18defcda3 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -27,7 +27,8 @@ Options
 The following options can be provided to set up an af_xdp port in DPDK.
 
 *   ``iface`` - name of the Kernel interface to attach to (required);
-*   ``queue`` - netdev queue id (optional, default 0);
+*   ``start_queue`` - starting netdev queue id (optional, default 0);
+*   ``queue_count`` - total netdev queue number (optional, default 1);
 *   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index ebef7bf34..9a4510701 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -57,7 +57,8 @@ static int af_xdp_logtype;
 #define ETH_AF_XDP_NUM_BUFFERS		4096
 #define ETH_AF_XDP_DATA_HEADROOM	0
 #define ETH_AF_XDP_DFLT_NUM_DESCS	XSK_RING_CONS__DEFAULT_NUM_DESCS
-#define ETH_AF_XDP_DFLT_QUEUE_IDX	0
+#define ETH_AF_XDP_DFLT_START_QUEUE_IDX	0
+#define ETH_AF_XDP_DFLT_QUEUE_COUNT	1
 
 #define ETH_AF_XDP_RX_BATCH_SIZE	32
 #define ETH_AF_XDP_TX_BATCH_SIZE	32
@@ -88,7 +89,7 @@ struct pkt_rx_queue {
 	struct rx_stats stats;
 
 	struct pkt_tx_queue *pair;
-	uint16_t queue_idx;
+	int xsk_queue_idx;
 };
 
 struct tx_stats {
@@ -103,13 +104,15 @@ struct pkt_tx_queue {
 	struct tx_stats stats;
 
 	struct pkt_rx_queue *pair;
-	uint16_t queue_idx;
+	int xsk_queue_idx;
 };
 
 struct pmd_internals {
 	int if_index;
 	char if_name[IFNAMSIZ];
-	uint16_t queue_idx;
+	int start_queue_idx;
+	int queue_cnt;
+
 	int pmd_zc;
 	struct ether_addr eth_addr;
 	struct xsk_umem_info *umem;
@@ -120,12 +123,14 @@ struct pmd_internals {
 };
 
 #define ETH_AF_XDP_IFACE_ARG			"iface"
-#define ETH_AF_XDP_QUEUE_IDX_ARG		"queue"
+#define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
+#define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
 #define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
-	ETH_AF_XDP_QUEUE_IDX_ARG,
+	ETH_AF_XDP_START_QUEUE_ARG,
+	ETH_AF_XDP_QUEUE_COUNT_ARG,
 	ETH_AF_XDP_PMD_ZC_ARG,
 	NULL
 };
@@ -395,8 +400,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->if_index = internals->if_index;
 	dev_info->max_mac_addrs = 1;
 	dev_info->max_rx_pktlen = ETH_FRAME_LEN;
-	dev_info->max_rx_queues = 1;
-	dev_info->max_tx_queues = 1;
+	dev_info->max_rx_queues = internals->queue_cnt;
+	dev_info->max_tx_queues = internals->queue_cnt;
 
 	dev_info->min_mtu = ETHER_MIN_MTU;
 	dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM;
@@ -528,7 +533,8 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused,
 }
 
 static struct
-xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals)
+xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
+				  struct pkt_rx_queue *rxq)
 {
 	struct xsk_umem_info *umem;
 	const struct rte_memzone *mz;
@@ -549,7 +555,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals)
 	}
 
 	snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u",
-		       internals->if_name, internals->queue_idx);
+		       internals->if_name, rxq->xsk_queue_idx);
 	umem->buf_ring = rte_ring_create(ring_name,
 					 ETH_AF_XDP_NUM_BUFFERS,
 					 rte_socket_id(),
@@ -565,7 +571,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals)
 					  ETH_AF_XDP_DATA_HEADROOM));
 
 	snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u",
-		       internals->if_name, internals->queue_idx);
+		       internals->if_name, rxq->xsk_queue_idx);
 	mz = rte_memzone_reserve_aligned(mz_name,
 			ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE,
 			rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG,
@@ -602,7 +608,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	int ret = 0;
 	int reserve_size;
 
-	rxq->umem = xdp_umem_configure(internals);
+	rxq->umem = xdp_umem_configure(internals, rxq);
 	if (rxq->umem == NULL)
 		return -ENOMEM;
 
@@ -612,7 +618,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	cfg.bind_flags = 0;
 	ret = xsk_socket__create(&rxq->xsk, internals->if_name,
-			internals->queue_idx, rxq->umem->umem, &rxq->rx,
+			rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx,
 			&txq->tx, &cfg);
 	if (ret) {
 		AF_XDP_LOG(ERR, "Failed to create xsk socket.\n");
@@ -635,20 +641,6 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	return ret;
 }
 
-static void
-queue_reset(struct pmd_internals *internals, uint16_t queue_idx)
-{
-	struct pkt_rx_queue *rxq = &internals->rx_queues[queue_idx];
-	struct pkt_tx_queue *txq = rxq->pair;
-
-	memset(rxq, 0, sizeof(*rxq));
-	memset(txq, 0, sizeof(*txq));
-	rxq->pair = txq;
-	txq->pair = rxq;
-	rxq->queue_idx = queue_idx;
-	txq->queue_idx = queue_idx;
-}
-
 static int
 eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   uint16_t rx_queue_id,
@@ -663,8 +655,9 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	int ret;
 
 	rxq = &internals->rx_queues[rx_queue_id];
-	queue_reset(internals, rx_queue_id);
 
+	AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n",
+		   rx_queue_id, rxq->xsk_queue_idx);
 	/* Now get the space available for data in the mbuf */
 	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
 		RTE_PKTMBUF_HEADROOM;
@@ -695,7 +688,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	return 0;
 
 err:
-	queue_reset(internals, rx_queue_id);
 	return ret;
 }
 
@@ -825,8 +817,8 @@ parse_name_arg(const char *key __rte_unused,
 }
 
 static int
-parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
-				int *pmd_zc)
+parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
+			int *queue_cnt, int *pmd_zc)
 {
 	int ret;
 
@@ -835,11 +827,18 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
 	if (ret < 0)
 		goto free_kvlist;
 
-	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IDX_ARG,
-				 &parse_integer_arg, queue_idx);
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG,
+				 &parse_integer_arg, start_queue);
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG,
+				 &parse_integer_arg, queue_cnt);
+	if (ret < 0 || *queue_cnt > ETH_AF_XDP_MAX_QUEUE_PAIRS) {
+		ret = -EINVAL;
+		goto free_kvlist;
+	}
+
 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
 				 &parse_integer_arg, pmd_zc);
 	if (ret < 0)
@@ -881,8 +880,8 @@ get_iface_info(const char *if_name,
 }
 
 static struct rte_eth_dev *
-init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
-					int pmd_zc)
+init_internals(struct rte_vdev_device *dev, const char *if_name,
+			int start_queue_idx, int queue_cnt, int pmd_zc)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -895,13 +894,16 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
 	if (internals == NULL)
 		return NULL;
 
-	internals->queue_idx = queue_idx;
+	internals->start_queue_idx = start_queue_idx;
+	internals->queue_cnt = queue_cnt;
 	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
-	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
+	for (i = 0; i < queue_cnt; i++) {
 		internals->tx_queues[i].pair = &internals->rx_queues[i];
 		internals->rx_queues[i].pair = &internals->tx_queues[i];
+		internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i;
+		internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i;
 	}
 
 	ret = get_iface_info(if_name, &internals->eth_addr,
@@ -934,7 +936,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
 	struct rte_kvargs *kvlist;
 	char if_name[IFNAMSIZ] = {'\0'};
-	int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
+	int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX;
+	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
 	int pmd_zc = 0;
@@ -964,7 +967,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	if (dev->device.numa_node == SOCKET_ID_ANY)
 		dev->device.numa_node = rte_socket_id();
 
-	if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) {
+	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
+			     &xsk_queue_cnt, &pmd_zc) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -974,7 +978,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -EINVAL;
 	}
 
-	eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc);
+	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
+					xsk_queue_cnt, pmd_zc);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1016,7 +1021,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = {
 RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
 RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
-			      "queue=<int> "
+			      "start_queue=<int> "
+			      "queue_count=<int> "
 			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support
  2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
@ 2019-05-15  8:38 ` Xiaolong Ye
  2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
  3 siblings, 0 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-15  8:38 UTC (permalink / raw)
  To: Xiaolong Ye, Qi Zhang, John McNamara, Marko Kovacevic
  Cc: Karlsson Magnus, Topel Bjorn, dev

This patch enables busy-poll support for AF_XDP pmd. With busy-poll, the
kernel driver is executed in process context by calling the poll() syscall.

The main advantage of busy-poll feature is that all processing occurs on a
single core. This eliminates the core-to-core cache transfers that occur
between the application and the softirqd processing on another core.

The drawback of busy-poll is that it will downgrade the max throughput due
to syscall, but from a per-core perspective, the performance is better as
normal mode runs on two cores and busy-poll only runs on a single core.

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |  1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c | 48 ++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 18defcda3..e42065170 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -29,6 +29,7 @@ The following options can be provided to set up an af_xdp port in DPDK.
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``start_queue`` - starting netdev queue id (optional, default 0);
 *   ``queue_count`` - total netdev queue number (optional, default 1);
+*   ``busy_poll_size`` - busy poll batch size (optional, default 0);
 *   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 9a4510701..1e46a4ef4 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <netinet/in.h>
+#include <poll.h>
 #include <net/if.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
@@ -72,6 +73,7 @@ struct xsk_umem_info {
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
 	int pmd_zc;
+	int busy_poll;
 };
 
 struct rx_stats {
@@ -114,6 +116,7 @@ struct pmd_internals {
 	int queue_cnt;
 
 	int pmd_zc;
+	int busy_poll_size;
 	struct ether_addr eth_addr;
 	struct xsk_umem_info *umem;
 	struct rte_mempool *mb_pool_share;
@@ -126,12 +129,14 @@ struct pmd_internals {
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
 #define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
+#define ETH_AF_XDP_BUSY_POLL_SIZE_ARG		"busy_poll_size"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_START_QUEUE_ARG,
 	ETH_AF_XDP_QUEUE_COUNT_ARG,
 	ETH_AF_XDP_PMD_ZC_ARG,
+	ETH_AF_XDP_BUSY_POLL_SIZE_ARG,
 	NULL
 };
 
@@ -191,6 +196,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_cons *rx = &rxq->rx;
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &umem->fq;
+	struct pollfd pfds[1];
 	uint32_t idx_rx = 0;
 	uint32_t free_thresh = fq->size >> 1;
 	int pmd_zc = umem->pmd_zc;
@@ -199,6 +205,15 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned long rx_bytes = 0;
 	int rcvd, i;
 
+	if (umem->busy_poll) {
+		memset(pfds, 0, sizeof(pfds));
+		pfds[0].fd = xsk_socket__fd(rxq->xsk);
+		pfds[0].events = POLLIN;
+
+		if (poll(pfds, 1, 0) <= 0)
+			return 0;
+	}
+
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
 
 	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0))
@@ -305,12 +320,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->pair->umem;
 	struct rte_mbuf *mbuf;
+	struct pollfd pfds[1];
 	int pmd_zc = umem->pmd_zc;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 
+	if (umem->busy_poll) {
+		memset(pfds, 0, sizeof(pfds));
+		pfds[0].fd = xsk_socket__fd(txq->pair->xsk);
+		pfds[0].events = POLLOUT;
+		if (poll(pfds, 1, 0) <= 0)
+			return 0;
+		if (!(pfds[0].revents & POLLOUT))
+			return 0;
+	}
+
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
 
 	pull_umem_cq(umem, nb_pkts);
@@ -615,6 +641,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	cfg.rx_size = ring_size;
 	cfg.tx_size = ring_size;
 	cfg.libbpf_flags = 0;
+	cfg.busy_poll = internals->busy_poll_size;
 	cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	cfg.bind_flags = 0;
 	ret = xsk_socket__create(&rxq->xsk, internals->if_name,
@@ -680,10 +707,14 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	internals->umem = rxq->umem;
 	internals->umem->pmd_zc = internals->pmd_zc;
+	internals->umem->busy_poll = internals->busy_poll_size ? 1 : 0;
 
 	if (internals->umem->pmd_zc)
 		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
 
+	if (internals->umem->busy_poll)
+		AF_XDP_LOG(INFO, "Busy poll enabled.\n");
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -818,7 +849,7 @@ parse_name_arg(const char *key __rte_unused,
 
 static int
 parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
-			int *queue_cnt, int *pmd_zc)
+			int *queue_cnt, int *pmd_zc, int *busy_poll_size)
 {
 	int ret;
 
@@ -844,6 +875,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUSY_POLL_SIZE_ARG,
+				 &parse_integer_arg, busy_poll_size);
+	if (ret < 0)
+		goto free_kvlist;
+
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -881,7 +917,8 @@ get_iface_info(const char *if_name,
 
 static struct rte_eth_dev *
 init_internals(struct rte_vdev_device *dev, const char *if_name,
-			int start_queue_idx, int queue_cnt, int pmd_zc)
+			int start_queue_idx, int queue_cnt, int pmd_zc,
+				int busy_poll_size)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -897,6 +934,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	internals->start_queue_idx = start_queue_idx;
 	internals->queue_cnt = queue_cnt;
 	internals->pmd_zc = pmd_zc;
+	internals->busy_poll_size = busy_poll_size;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
 	for (i = 0; i < queue_cnt; i++) {
@@ -941,6 +979,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
 	int pmd_zc = 0;
+	int busy_poll_size = 0;
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -968,7 +1007,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		dev->device.numa_node = rte_socket_id();
 
 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
-			     &xsk_queue_cnt, &pmd_zc) < 0) {
+			     &xsk_queue_cnt, &pmd_zc, &busy_poll_size) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -979,7 +1018,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	}
 
 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
-					xsk_queue_cnt, pmd_zc);
+				 xsk_queue_cnt, pmd_zc, busy_poll_size);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1023,6 +1062,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
 			      "start_queue=<int> "
 			      "queue_count=<int> "
+			      "busy_poll_size=<int> "
 			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd
  2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
                   ` (2 preceding siblings ...)
  2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support Xiaolong Ye
@ 2019-05-30  9:07 ` Xiaolong Ye
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
                     ` (3 more replies)
  3 siblings, 4 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-30  9:07 UTC (permalink / raw)
  Cc: Qi Zhang, Karlsson Magnus, Topel Bjorn, dev, Xiaolong Ye

This patch series mainly includes 3 new features for AF_XDP pmd. They
are separated independent features, the reason I take them in one
patchset is that they have code dependency.

1. zero copy

This patch enables `zero copy` between af_xdp umem and mbuf by using
external mbuf mechanism.

2. multi-queue

With mutli-queue support, one AF_XDP pmd instance can use multi netdev
queues.

3. busy-poll

With busy-poll, all processing occurs on a single core, performance is
better from a per-core perspective.

This patch has dependency on busy-poll support in kernel side and now it
is in
RFC stage [1].

[1] https://www.spinics.net/lists/netdev/msg568337.html

V2 changes:

1. improve mutli-queue support by getting the ethtool channel, so the
   driver is able to get a reason maximum queue number.
2. add a tiny cleanup patch to get rid of unused struct member
3. remove the busy-poll patch as its kernel counterpart changes, will
   update the patch later

Xiaolong Ye (3):
  net/af_xdp: enable zero copy by extbuf
  net/af_xdp: add multi-queue support
  net/af_xdp: remove unused struct member

 doc/guides/nics/af_xdp.rst          |   4 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c | 285 ++++++++++++++++++++--------
 2 files changed, 213 insertions(+), 76 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
  2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
@ 2019-05-30  9:07   ` Xiaolong Ye
  2019-05-30 15:31     ` Stephen Hemminger
  2019-06-11 16:16     ` William Tu
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-30  9:07 UTC (permalink / raw)
  To: Xiaolong Ye, Qi Zhang, John McNamara, Marko Kovacevic
  Cc: Karlsson Magnus, Topel Bjorn, dev

Implement zero copy of af_xdp pmd through mbuf's external memory
mechanism to achieve high performance.

This patch also provides a new parameter "pmd_zero_copy" for user, so they
can choose to enable zero copy of af_xdp pmd or not.

To be clear, "zero copy" here is different from the "zero copy mode" of
AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk
application.

Suggested-by: Varghese Vipin <vipin.varghese@intel.com>
Suggested-by: Tummala Sivaprasad <sivaprasad.tummala@intel.com>
Suggested-by: Olivier Matz <olivier.matz@6wind.com>
Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |   1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c | 104 +++++++++++++++++++++-------
 2 files changed, 79 insertions(+), 26 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 175038153..0bd4239fe 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK.
 
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``queue`` - netdev queue id (optional, default 0);
+*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
 -------------
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 35c72272c..014cd5691 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -70,6 +70,7 @@ struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
+	int pmd_zc;
 };
 
 struct rx_stats {
@@ -109,8 +110,8 @@ struct pmd_internals {
 	int if_index;
 	char if_name[IFNAMSIZ];
 	uint16_t queue_idx;
+	int pmd_zc;
 	struct ether_addr eth_addr;
-	struct xsk_umem_info *umem;
 	struct rte_mempool *mb_pool_share;
 
 	struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
@@ -119,10 +120,12 @@ struct pmd_internals {
 
 #define ETH_AF_XDP_IFACE_ARG			"iface"
 #define ETH_AF_XDP_QUEUE_IDX_ARG		"queue"
+#define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_QUEUE_IDX_ARG,
+	ETH_AF_XDP_PMD_ZC_ARG,
 	NULL
 };
 
@@ -166,6 +169,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
 	return 0;
 }
 
+static void
+umem_buf_release_to_fq(void *addr, void *opaque)
+{
+	struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
+	uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
+
+	rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
+}
+
 static uint16_t
 eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -175,6 +187,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_prod *fq = &umem->fq;
 	uint32_t idx_rx = 0;
 	uint32_t free_thresh = fq->size >> 1;
+	int pmd_zc = umem->pmd_zc;
 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
 	unsigned long dropped = 0;
 	unsigned long rx_bytes = 0;
@@ -197,19 +210,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		uint64_t addr;
 		uint32_t len;
 		void *pkt;
+		uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
+		struct rte_mbuf_ext_shared_info *shinfo;
 
 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
 		addr = desc->addr;
 		len = desc->len;
 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
 
-		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
+		if (pmd_zc) {
+			shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
+					&buf_len, umem_buf_release_to_fq, umem);
+
+			rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
+						  shinfo);
+		} else {
+			rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
+							pkt, len);
+			rte_ring_enqueue(umem->buf_ring, (void *)addr);
+		}
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
 		rte_pktmbuf_data_len(mbufs[i]) = len;
 		rx_bytes += len;
 		bufs[i] = mbufs[i];
-
-		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 	}
 
 	xsk_ring_cons__release(rx, rcvd);
@@ -262,12 +285,21 @@ kick_tx(struct pkt_tx_queue *txq)
 	pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
 }
 
+static inline bool
+in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
+{
+	uint64_t mz_base_addr = umem->mz->addr_64;
+
+	return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
+}
+
 static uint16_t
 eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->pair->umem;
 	struct rte_mbuf *mbuf;
+	int pmd_zc = umem->pmd_zc;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
@@ -294,16 +326,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
 		mbuf = bufs[i];
-
-		desc->addr = (uint64_t)addrs[i];
 		desc->len = mbuf->pkt_len;
-		pkt = xsk_umem__get_data(umem->mz->addr,
-					 desc->addr);
-		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-			   desc->len);
-		tx_bytes += mbuf->pkt_len;
 
-		rte_pktmbuf_free(mbuf);
+		/*
+		 * We need to make sure the external mbuf address is within
+		 * current port's umem memzone range
+		 */
+		if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
+				in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
+			desc->addr = (uint64_t)mbuf->buf_addr -
+				umem->mz->addr_64;
+			mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
+					(uint64_t)addrs[i]);
+		} else {
+			desc->addr = (uint64_t)addrs[i];
+			pkt = xsk_umem__get_data(umem->mz->addr,
+					desc->addr);
+			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+					desc->len);
+		}
+		tx_bytes += mbuf->pkt_len;
 	}
 
 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
@@ -313,6 +355,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	txq->stats.tx_pkts += nb_pkts;
 	txq->stats.tx_bytes += tx_bytes;
 
+	for (i = 0; i < nb_pkts; i++)
+		rte_pktmbuf_free(bufs[i]);
+
 	return nb_pkts;
 }
 
@@ -454,18 +499,16 @@ eth_dev_close(struct rte_eth_dev *dev)
 		if (rxq->umem == NULL)
 			break;
 		xsk_socket__delete(rxq->xsk);
+		(void)xsk_umem__delete(rxq->umem->umem);
+		xdp_umem_destroy(rxq->umem);
 	}
 
-	(void)xsk_umem__delete(internals->umem->umem);
-
 	/*
 	 * MAC is not allocated dynamically, setting it to NULL would prevent
 	 * from releasing it in rte_eth_dev_release_port.
 	 */
 	dev->data->mac_addrs = NULL;
 
-	xdp_umem_destroy(internals->umem);
-
 	remove_xdp_program(internals);
 }
 
@@ -639,7 +682,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		goto err;
 	}
 
-	internals->umem = rxq->umem;
+	rxq->umem->pmd_zc = internals->pmd_zc;
 
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
@@ -775,9 +818,8 @@ parse_name_arg(const char *key __rte_unused,
 }
 
 static int
-parse_parameters(struct rte_kvargs *kvlist,
-		 char *if_name,
-		 int *queue_idx)
+parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
+				int *pmd_zc)
 {
 	int ret;
 
@@ -791,6 +833,11 @@ parse_parameters(struct rte_kvargs *kvlist,
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
+				 &parse_integer_arg, pmd_zc);
+	if (ret < 0)
+		goto free_kvlist;
+
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -827,9 +874,8 @@ get_iface_info(const char *if_name,
 }
 
 static struct rte_eth_dev *
-init_internals(struct rte_vdev_device *dev,
-	       const char *if_name,
-	       int queue_idx)
+init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
+					int pmd_zc)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -843,6 +889,7 @@ init_internals(struct rte_vdev_device *dev,
 		return NULL;
 
 	internals->queue_idx = queue_idx;
+	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
 	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
@@ -868,6 +915,9 @@ init_internals(struct rte_vdev_device *dev,
 	/* Let rte_eth_dev_close() release the port resources. */
 	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
 
+	if (internals->pmd_zc)
+		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
+
 	return eth_dev;
 
 err:
@@ -883,6 +933,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
+	int pmd_zc = 0;
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -909,7 +960,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	if (dev->device.numa_node == SOCKET_ID_ANY)
 		dev->device.numa_node = rte_socket_id();
 
-	if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) {
+	if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -919,7 +970,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -EINVAL;
 	}
 
-	eth_dev = init_internals(dev, if_name, xsk_queue_idx);
+	eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -961,7 +1012,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = {
 RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
 RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
-			      "queue=<int> ");
+			      "queue=<int> "
+			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)
 {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support
  2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
@ 2019-05-30  9:07   ` Xiaolong Ye
  2019-05-30 15:32     ` Stephen Hemminger
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 3/3] net/af_xdp: remove unused struct member Xiaolong Ye
  2019-06-10 16:54   ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Ferruh Yigit
  3 siblings, 1 reply; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-30  9:07 UTC (permalink / raw)
  To: Xiaolong Ye, Qi Zhang, John McNamara, Marko Kovacevic
  Cc: Karlsson Magnus, Topel Bjorn, dev

This patch adds two parameters `start_queue` and `queue_count` to
specify the range of netdev queues used by AF_XDP pmd.

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |   3 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c | 194 ++++++++++++++++++++--------
 2 files changed, 141 insertions(+), 56 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 0bd4239fe..18defcda3 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -27,7 +27,8 @@ Options
 The following options can be provided to set up an af_xdp port in DPDK.
 
 *   ``iface`` - name of the Kernel interface to attach to (required);
-*   ``queue`` - netdev queue id (optional, default 0);
+*   ``start_queue`` - starting netdev queue id (optional, default 0);
+*   ``queue_count`` - total netdev queue number (optional, default 1);
 *   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 014cd5691..f56aabcae 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -12,6 +12,8 @@
 #include <linux/if_ether.h>
 #include <linux/if_xdp.h>
 #include <linux/if_link.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
 #include "af_xdp_deps.h"
 #include <bpf/xsk.h>
 
@@ -57,12 +59,12 @@ static int af_xdp_logtype;
 #define ETH_AF_XDP_NUM_BUFFERS		4096
 #define ETH_AF_XDP_DATA_HEADROOM	0
 #define ETH_AF_XDP_DFLT_NUM_DESCS	XSK_RING_CONS__DEFAULT_NUM_DESCS
-#define ETH_AF_XDP_DFLT_QUEUE_IDX	0
+#define ETH_AF_XDP_DFLT_START_QUEUE_IDX	0
+#define ETH_AF_XDP_DFLT_QUEUE_COUNT	1
 
 #define ETH_AF_XDP_RX_BATCH_SIZE	32
 #define ETH_AF_XDP_TX_BATCH_SIZE	32
 
-#define ETH_AF_XDP_MAX_QUEUE_PAIRS     16
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
@@ -88,7 +90,7 @@ struct pkt_rx_queue {
 	struct rx_stats stats;
 
 	struct pkt_tx_queue *pair;
-	uint16_t queue_idx;
+	int xsk_queue_idx;
 };
 
 struct tx_stats {
@@ -103,28 +105,34 @@ struct pkt_tx_queue {
 	struct tx_stats stats;
 
 	struct pkt_rx_queue *pair;
-	uint16_t queue_idx;
+	int xsk_queue_idx;
 };
 
 struct pmd_internals {
 	int if_index;
 	char if_name[IFNAMSIZ];
-	uint16_t queue_idx;
+	int start_queue_idx;
+	int queue_cnt;
+	int max_queue_cnt;
+	int combined_queue_cnt;
+
 	int pmd_zc;
 	struct ether_addr eth_addr;
 	struct rte_mempool *mb_pool_share;
 
-	struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
-	struct pkt_tx_queue tx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
+	struct pkt_rx_queue *rx_queues;
+	struct pkt_tx_queue *tx_queues;
 };
 
 #define ETH_AF_XDP_IFACE_ARG			"iface"
-#define ETH_AF_XDP_QUEUE_IDX_ARG		"queue"
+#define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
+#define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
 #define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
-	ETH_AF_XDP_QUEUE_IDX_ARG,
+	ETH_AF_XDP_START_QUEUE_ARG,
+	ETH_AF_XDP_QUEUE_COUNT_ARG,
 	ETH_AF_XDP_PMD_ZC_ARG,
 	NULL
 };
@@ -394,8 +402,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->if_index = internals->if_index;
 	dev_info->max_mac_addrs = 1;
 	dev_info->max_rx_pktlen = ETH_FRAME_LEN;
-	dev_info->max_rx_queues = 1;
-	dev_info->max_tx_queues = 1;
+	dev_info->max_rx_queues = internals->queue_cnt;
+	dev_info->max_tx_queues = internals->queue_cnt;
 
 	dev_info->min_mtu = ETHER_MIN_MTU;
 	dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM;
@@ -412,21 +420,23 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	struct pmd_internals *internals = dev->data->dev_private;
 	struct xdp_statistics xdp_stats;
 	struct pkt_rx_queue *rxq;
+	struct pkt_tx_queue *txq;
 	socklen_t optlen;
 	int i, ret;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		optlen = sizeof(struct xdp_statistics);
 		rxq = &internals->rx_queues[i];
-		stats->q_ipackets[i] = internals->rx_queues[i].stats.rx_pkts;
-		stats->q_ibytes[i] = internals->rx_queues[i].stats.rx_bytes;
+		txq = rxq->pair;
+		stats->q_ipackets[i] = rxq->stats.rx_pkts;
+		stats->q_ibytes[i] = rxq->stats.rx_bytes;
 
-		stats->q_opackets[i] = internals->tx_queues[i].stats.tx_pkts;
-		stats->q_obytes[i] = internals->tx_queues[i].stats.tx_bytes;
+		stats->q_opackets[i] = txq->stats.tx_pkts;
+		stats->q_obytes[i] = txq->stats.tx_bytes;
 
 		stats->ipackets += stats->q_ipackets[i];
 		stats->ibytes += stats->q_ibytes[i];
-		stats->imissed += internals->rx_queues[i].stats.rx_dropped;
+		stats->imissed += rxq->stats.rx_dropped;
 		ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
 				XDP_STATISTICS, &xdp_stats, &optlen);
 		if (ret != 0) {
@@ -436,7 +446,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->imissed += xdp_stats.rx_dropped;
 
 		stats->opackets += stats->q_opackets[i];
-		stats->oerrors += internals->tx_queues[i].stats.err_pkts;
+		stats->oerrors += txq->stats.err_pkts;
 		stats->obytes += stats->q_obytes[i];
 	}
 
@@ -449,7 +459,7 @@ eth_stats_reset(struct rte_eth_dev *dev)
 	struct pmd_internals *internals = dev->data->dev_private;
 	int i;
 
-	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
+	for (i = 0; i < internals->queue_cnt; i++) {
 		memset(&internals->rx_queues[i].stats, 0,
 					sizeof(struct rx_stats));
 		memset(&internals->tx_queues[i].stats, 0,
@@ -494,13 +504,17 @@ eth_dev_close(struct rte_eth_dev *dev)
 	AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
 
-	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
+	for (i = 0; i < internals->queue_cnt; i++) {
 		rxq = &internals->rx_queues[i];
 		if (rxq->umem == NULL)
 			break;
 		xsk_socket__delete(rxq->xsk);
 		(void)xsk_umem__delete(rxq->umem->umem);
 		xdp_umem_destroy(rxq->umem);
+
+		/* free pkt_tx_queue */
+		rte_free(rxq->pair);
+		rte_free(rxq);
 	}
 
 	/*
@@ -525,7 +539,8 @@ eth_link_update(struct rte_eth_dev *dev __rte_unused,
 }
 
 static struct
-xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals)
+xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
+				  struct pkt_rx_queue *rxq)
 {
 	struct xsk_umem_info *umem;
 	const struct rte_memzone *mz;
@@ -546,7 +561,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals)
 	}
 
 	snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u",
-		       internals->if_name, internals->queue_idx);
+		       internals->if_name, rxq->xsk_queue_idx);
 	umem->buf_ring = rte_ring_create(ring_name,
 					 ETH_AF_XDP_NUM_BUFFERS,
 					 rte_socket_id(),
@@ -562,7 +577,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals)
 					  ETH_AF_XDP_DATA_HEADROOM));
 
 	snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u",
-		       internals->if_name, internals->queue_idx);
+		       internals->if_name, rxq->xsk_queue_idx);
 	mz = rte_memzone_reserve_aligned(mz_name,
 			ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE,
 			rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG,
@@ -599,7 +614,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	int ret = 0;
 	int reserve_size;
 
-	rxq->umem = xdp_umem_configure(internals);
+	rxq->umem = xdp_umem_configure(internals, rxq);
 	if (rxq->umem == NULL)
 		return -ENOMEM;
 
@@ -609,7 +624,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	cfg.bind_flags = 0;
 	ret = xsk_socket__create(&rxq->xsk, internals->if_name,
-			internals->queue_idx, rxq->umem->umem, &rxq->rx,
+			rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx,
 			&txq->tx, &cfg);
 	if (ret) {
 		AF_XDP_LOG(ERR, "Failed to create xsk socket.\n");
@@ -632,20 +647,6 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	return ret;
 }
 
-static void
-queue_reset(struct pmd_internals *internals, uint16_t queue_idx)
-{
-	struct pkt_rx_queue *rxq = &internals->rx_queues[queue_idx];
-	struct pkt_tx_queue *txq = rxq->pair;
-
-	memset(rxq, 0, sizeof(*rxq));
-	memset(txq, 0, sizeof(*txq));
-	rxq->pair = txq;
-	txq->pair = rxq;
-	rxq->queue_idx = queue_idx;
-	txq->queue_idx = queue_idx;
-}
-
 static int
 eth_rx_queue_setup(struct rte_eth_dev *dev,
 		   uint16_t rx_queue_id,
@@ -660,8 +661,9 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	int ret;
 
 	rxq = &internals->rx_queues[rx_queue_id];
-	queue_reset(internals, rx_queue_id);
 
+	AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n",
+		   rx_queue_id, rxq->xsk_queue_idx);
 	/* Now get the space available for data in the mbuf */
 	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
 		RTE_PKTMBUF_HEADROOM;
@@ -688,7 +690,6 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	return 0;
 
 err:
-	queue_reset(internals, rx_queue_id);
 	return ret;
 }
 
@@ -818,8 +819,45 @@ parse_name_arg(const char *key __rte_unused,
 }
 
 static int
-parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
-				int *pmd_zc)
+xdp_get_channels_info(const char *if_name, int *max_queues,
+				int *combined_queues)
+{
+	struct ethtool_channels channels;
+	struct ifreq ifr;
+	int fd, ret;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd < 0)
+		return -1;
+
+	channels.cmd = ETHTOOL_GCHANNELS;
+	ifr.ifr_data = (void *)&channels;
+	strncpy(ifr.ifr_name, if_name, IFNAMSIZ);
+	ret = ioctl(fd, SIOCETHTOOL, &ifr);
+	if (ret && errno != EOPNOTSUPP) {
+		ret = -errno;
+		goto out;
+	}
+
+	if (channels.max_combined == 0 || errno == EOPNOTSUPP) {
+		/* If the device says it has no channels, then all traffic
+		 * is sent to a single stream, so max queues = 1.
+		 */
+		*max_queues = 1;
+		*combined_queues = 1;
+	} else {
+		*max_queues = channels.max_combined;
+		*combined_queues = channels.combined_count;
+	}
+
+ out:
+	close(fd);
+	return ret;
+}
+
+static int
+parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
+			int *queue_cnt, int *pmd_zc)
 {
 	int ret;
 
@@ -828,11 +866,18 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
 	if (ret < 0)
 		goto free_kvlist;
 
-	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IDX_ARG,
-				 &parse_integer_arg, queue_idx);
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG,
+				 &parse_integer_arg, start_queue);
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG,
+				 &parse_integer_arg, queue_cnt);
+	if (ret < 0 || *queue_cnt <= 0) {
+		ret = -EINVAL;
+		goto free_kvlist;
+	}
+
 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
 				 &parse_integer_arg, pmd_zc);
 	if (ret < 0)
@@ -874,8 +919,8 @@ get_iface_info(const char *if_name,
 }
 
 static struct rte_eth_dev *
-init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
-					int pmd_zc)
+init_internals(struct rte_vdev_device *dev, const char *if_name,
+			int start_queue_idx, int queue_cnt, int pmd_zc)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -888,23 +933,54 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
 	if (internals == NULL)
 		return NULL;
 
-	internals->queue_idx = queue_idx;
+	internals->start_queue_idx = start_queue_idx;
+	internals->queue_cnt = queue_cnt;
 	internals->pmd_zc = pmd_zc;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
-	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
+	if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
+				  &internals->combined_queue_cnt)) {
+		AF_XDP_LOG(ERR, "Failed to get channel info of interface: %s\n",
+				if_name);
+		goto err_free_internals;
+	}
+
+	if (queue_cnt > internals->combined_queue_cnt) {
+		AF_XDP_LOG(ERR, "Specified queue count %d is larger than combined queue count %d.\n",
+				queue_cnt, internals->combined_queue_cnt);
+		goto err_free_internals;
+	}
+
+	internals->rx_queues = rte_zmalloc_socket(NULL,
+					sizeof(struct pkt_rx_queue) * queue_cnt,
+					0, numa_node);
+	if (internals->rx_queues == NULL) {
+		AF_XDP_LOG(ERR, "Failed to allocate memory for rx queues.\n");
+		goto err_free_internals;
+	}
+
+	internals->tx_queues = rte_zmalloc_socket(NULL,
+					sizeof(struct pkt_tx_queue) * queue_cnt,
+					0, numa_node);
+	if (internals->tx_queues == NULL) {
+		AF_XDP_LOG(ERR, "Failed to allocate memory for tx queues.\n");
+		goto err_free_rx;
+	}
+	for (i = 0; i < queue_cnt; i++) {
 		internals->tx_queues[i].pair = &internals->rx_queues[i];
 		internals->rx_queues[i].pair = &internals->tx_queues[i];
+		internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i;
+		internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i;
 	}
 
 	ret = get_iface_info(if_name, &internals->eth_addr,
 			     &internals->if_index);
 	if (ret)
-		goto err;
+		goto err_free_tx;
 
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (eth_dev == NULL)
-		goto err;
+		goto err_free_tx;
 
 	eth_dev->data->dev_private = internals;
 	eth_dev->data->dev_link = pmd_link;
@@ -920,7 +996,11 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
 
 	return eth_dev;
 
-err:
+err_free_tx:
+	rte_free(internals->tx_queues);
+err_free_rx:
+	rte_free(internals->rx_queues);
+err_free_internals:
 	rte_free(internals);
 	return NULL;
 }
@@ -930,7 +1010,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 {
 	struct rte_kvargs *kvlist;
 	char if_name[IFNAMSIZ] = {'\0'};
-	int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
+	int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX;
+	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
 	int pmd_zc = 0;
@@ -960,7 +1041,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	if (dev->device.numa_node == SOCKET_ID_ANY)
 		dev->device.numa_node = rte_socket_id();
 
-	if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) {
+	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
+			     &xsk_queue_cnt, &pmd_zc) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -970,7 +1052,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		return -EINVAL;
 	}
 
-	eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc);
+	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
+					xsk_queue_cnt, pmd_zc);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1012,7 +1095,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = {
 RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
 RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
-			      "queue=<int> "
+			      "start_queue=<int> "
+			      "queue_count=<int> "
 			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] net/af_xdp: remove unused struct member
  2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
@ 2019-05-30  9:07   ` Xiaolong Ye
  2019-06-10 16:54   ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Ferruh Yigit
  3 siblings, 0 replies; 16+ messages in thread
From: Xiaolong Ye @ 2019-05-30  9:07 UTC (permalink / raw)
  To: Xiaolong Ye, Qi Zhang; +Cc: Karlsson Magnus, Topel Bjorn, dev, stable

Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD")
Cc: stable@dpdk.org

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index f56aabcae..fc25d245b 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -118,7 +118,6 @@ struct pmd_internals {
 
 	int pmd_zc;
 	struct ether_addr eth_addr;
-	struct rte_mempool *mb_pool_share;
 
 	struct pkt_rx_queue *rx_queues;
 	struct pkt_tx_queue *tx_queues;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
@ 2019-05-30 15:31     ` Stephen Hemminger
  2019-05-31  1:49       ` Ye Xiaolong
  2019-06-11 16:16     ` William Tu
  1 sibling, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2019-05-30 15:31 UTC (permalink / raw)
  To: Xiaolong Ye
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, dev

On Thu, 30 May 2019 17:07:05 +0800
Xiaolong Ye <xiaolong.ye@intel.com> wrote:

> Implement zero copy of af_xdp pmd through mbuf's external memory
> mechanism to achieve high performance.
> 
> This patch also provides a new parameter "pmd_zero_copy" for user, so they
> can choose to enable zero copy of af_xdp pmd or not.
> 
> To be clear, "zero copy" here is different from the "zero copy mode" of
> AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk
> application.
> 
> Suggested-by: Varghese Vipin <vipin.varghese@intel.com>
> Suggested-by: Tummala Sivaprasad <sivaprasad.tummala@intel.com>
> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>

Why is this a parameter? Can it just be auto detected.
Remember configuration is evil, it hurts usability, code coverage
and increases complexity.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
@ 2019-05-30 15:32     ` Stephen Hemminger
  2019-05-31  1:53       ` Ye Xiaolong
  0 siblings, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2019-05-30 15:32 UTC (permalink / raw)
  To: Xiaolong Ye
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, dev

On Thu, 30 May 2019 17:07:06 +0800
Xiaolong Ye <xiaolong.ye@intel.com> wrote:

> This patch adds two parameters `start_queue` and `queue_count` to
> specify the range of netdev queues used by AF_XDP pmd.
> 
> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>

Why does this have to be a config option, we already have max queues
and number of queues in DPDK configuration.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
  2019-05-30 15:31     ` Stephen Hemminger
@ 2019-05-31  1:49       ` Ye Xiaolong
  0 siblings, 0 replies; 16+ messages in thread
From: Ye Xiaolong @ 2019-05-31  1:49 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, Ananyev Konstantin, dev

On 05/30, Stephen Hemminger wrote:
>On Thu, 30 May 2019 17:07:05 +0800
>Xiaolong Ye <xiaolong.ye@intel.com> wrote:
>
>> Implement zero copy of af_xdp pmd through mbuf's external memory
>> mechanism to achieve high performance.
>> 
>> This patch also provides a new parameter "pmd_zero_copy" for user, so they
>> can choose to enable zero copy of af_xdp pmd or not.
>> 
>> To be clear, "zero copy" here is different from the "zero copy mode" of
>> AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk
>> application.
>> 
>> Suggested-by: Varghese Vipin <vipin.varghese@intel.com>
>> Suggested-by: Tummala Sivaprasad <sivaprasad.tummala@intel.com>
>> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
>> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
>
>Why is this a parameter? Can it just be auto detected.

In case there might be situations when some users would perfer a slower copy
mode, we can just let user decide.

Thanks,
Xiaolong

>Remember configuration is evil, it hurts usability, code coverage
>and increases complexity.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support
  2019-05-30 15:32     ` Stephen Hemminger
@ 2019-05-31  1:53       ` Ye Xiaolong
  0 siblings, 0 replies; 16+ messages in thread
From: Ye Xiaolong @ 2019-05-31  1:53 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, dev

On 05/30, Stephen Hemminger wrote:
>On Thu, 30 May 2019 17:07:06 +0800
>Xiaolong Ye <xiaolong.ye@intel.com> wrote:
>
>> This patch adds two parameters `start_queue` and `queue_count` to
>> specify the range of netdev queues used by AF_XDP pmd.
>> 
>> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
>
>Why does this have to be a config option, we already have max queues
>and number of queues in DPDK configuration.

Here multi-queue refers to nic ethdev queue, AF_XDP now has 1:1 mapping between
af_xdp socket and netdev queue, this patch is to support one vdev with multi 
af_xdp sockets (netdev queues).

Thanks,
Xiaolong

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd
  2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
                     ` (2 preceding siblings ...)
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 3/3] net/af_xdp: remove unused struct member Xiaolong Ye
@ 2019-06-10 16:54   ` Ferruh Yigit
  3 siblings, 0 replies; 16+ messages in thread
From: Ferruh Yigit @ 2019-06-10 16:54 UTC (permalink / raw)
  To: Xiaolong Ye; +Cc: Qi Zhang, Karlsson Magnus, Topel Bjorn, dev

On 5/30/2019 10:07 AM, Xiaolong Ye wrote:
> This patch series mainly includes 3 new features for AF_XDP pmd. They
> are separated independent features, the reason I take them in one
> patchset is that they have code dependency.
> 
> 1. zero copy
> 
> This patch enables `zero copy` between af_xdp umem and mbuf by using
> external mbuf mechanism.
> 
> 2. multi-queue
> 
> With mutli-queue support, one AF_XDP pmd instance can use multi netdev
> queues.
> 
> 3. busy-poll
> 
> With busy-poll, all processing occurs on a single core, performance is
> better from a per-core perspective.
> 
> This patch has dependency on busy-poll support in kernel side and now it
> is in
> RFC stage [1].
> 
> [1] https://www.spinics.net/lists/netdev/msg568337.html
> 
> V2 changes:
> 
> 1. improve mutli-queue support by getting the ethtool channel, so the
>    driver is able to get a reason maximum queue number.
> 2. add a tiny cleanup patch to get rid of unused struct member
> 3. remove the busy-poll patch as its kernel counterpart changes, will
>    update the patch later
> 
> Xiaolong Ye (3):
>   net/af_xdp: enable zero copy by extbuf
>   net/af_xdp: add multi-queue support
>   net/af_xdp: remove unused struct member

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
  2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
  2019-05-30 15:31     ` Stephen Hemminger
@ 2019-06-11 16:16     ` William Tu
  2019-06-12 10:03       ` Ye Xiaolong
  1 sibling, 1 reply; 16+ messages in thread
From: William Tu @ 2019-06-11 16:16 UTC (permalink / raw)
  To: Xiaolong Ye
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, dev

On Thu, May 30, 2019 at 2:16 AM Xiaolong Ye <xiaolong.ye@intel.com> wrote:
>
> Implement zero copy of af_xdp pmd through mbuf's external memory
> mechanism to achieve high performance.
>
> This patch also provides a new parameter "pmd_zero_copy" for user, so they
> can choose to enable zero copy of af_xdp pmd or not.
>
> To be clear, "zero copy" here is different from the "zero copy mode" of
> AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk
> application.
>
> Suggested-by: Varghese Vipin <vipin.varghese@intel.com>
> Suggested-by: Tummala Sivaprasad <sivaprasad.tummala@intel.com>
> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
> ---
>  doc/guides/nics/af_xdp.rst          |   1 +
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 104 +++++++++++++++++++++-------
>  2 files changed, 79 insertions(+), 26 deletions(-)
>
> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> index 175038153..0bd4239fe 100644
> --- a/doc/guides/nics/af_xdp.rst
> +++ b/doc/guides/nics/af_xdp.rst
> @@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK.
>
>  *   ``iface`` - name of the Kernel interface to attach to (required);
>  *   ``queue`` - netdev queue id (optional, default 0);
> +*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
>
>  Prerequisites
>  -------------
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 35c72272c..014cd5691 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -70,6 +70,7 @@ struct xsk_umem_info {
>         struct xsk_umem *umem;
>         struct rte_ring *buf_ring;
>         const struct rte_memzone *mz;
> +       int pmd_zc;
>  };
>
>  struct rx_stats {
> @@ -109,8 +110,8 @@ struct pmd_internals {
>         int if_index;
>         char if_name[IFNAMSIZ];
>         uint16_t queue_idx;
> +       int pmd_zc;
>         struct ether_addr eth_addr;
> -       struct xsk_umem_info *umem;
>         struct rte_mempool *mb_pool_share;
>
>         struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
> @@ -119,10 +120,12 @@ struct pmd_internals {
>
>  #define ETH_AF_XDP_IFACE_ARG                   "iface"
>  #define ETH_AF_XDP_QUEUE_IDX_ARG               "queue"
> +#define ETH_AF_XDP_PMD_ZC_ARG                  "pmd_zero_copy"
>
>  static const char * const valid_arguments[] = {
>         ETH_AF_XDP_IFACE_ARG,
>         ETH_AF_XDP_QUEUE_IDX_ARG,
> +       ETH_AF_XDP_PMD_ZC_ARG,
>         NULL
>  };
>
> @@ -166,6 +169,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
>         return 0;
>  }
>
> +static void
> +umem_buf_release_to_fq(void *addr, void *opaque)
> +{
> +       struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
> +       uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
> +
> +       rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
> +}
> +
>  static uint16_t
>  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  {
> @@ -175,6 +187,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>         struct xsk_ring_prod *fq = &umem->fq;
>         uint32_t idx_rx = 0;
>         uint32_t free_thresh = fq->size >> 1;
> +       int pmd_zc = umem->pmd_zc;
>         struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
>         unsigned long dropped = 0;
>         unsigned long rx_bytes = 0;
> @@ -197,19 +210,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>                 uint64_t addr;
>                 uint32_t len;
>                 void *pkt;
> +               uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
> +               struct rte_mbuf_ext_shared_info *shinfo;
>
>                 desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
>                 addr = desc->addr;
>                 len = desc->len;
>                 pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
>
> -               rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
> +               if (pmd_zc) {
> +                       shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
> +                                       &buf_len, umem_buf_release_to_fq, umem);
> +
> +                       rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
> +                                                 shinfo);
> +               } else {
> +                       rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
> +                                                       pkt, len);
> +                       rte_ring_enqueue(umem->buf_ring, (void *)addr);
> +               }
>                 rte_pktmbuf_pkt_len(mbufs[i]) = len;
>                 rte_pktmbuf_data_len(mbufs[i]) = len;
>                 rx_bytes += len;
>                 bufs[i] = mbufs[i];
> -
> -               rte_ring_enqueue(umem->buf_ring, (void *)addr);
>         }
>
>         xsk_ring_cons__release(rx, rcvd);
> @@ -262,12 +285,21 @@ kick_tx(struct pkt_tx_queue *txq)
>         pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
>  }
>
> +static inline bool
> +in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
> +{
> +       uint64_t mz_base_addr = umem->mz->addr_64;
> +
> +       return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
> +}
> +
>  static uint16_t
>  eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  {
>         struct pkt_tx_queue *txq = queue;
>         struct xsk_umem_info *umem = txq->pair->umem;
>         struct rte_mbuf *mbuf;
> +       int pmd_zc = umem->pmd_zc;
>         void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
>         unsigned long tx_bytes = 0;
>         int i;
> @@ -294,16 +326,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>
>                 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
>                 mbuf = bufs[i];
> -
> -               desc->addr = (uint64_t)addrs[i];
>                 desc->len = mbuf->pkt_len;
> -               pkt = xsk_umem__get_data(umem->mz->addr,
> -                                        desc->addr);
> -               rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> -                          desc->len);
> -               tx_bytes += mbuf->pkt_len;
>
> -               rte_pktmbuf_free(mbuf);
> +               /*
> +                * We need to make sure the external mbuf address is within
> +                * current port's umem memzone range
> +                */
> +               if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
> +                               in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
> +                       desc->addr = (uint64_t)mbuf->buf_addr -
> +                               umem->mz->addr_64;
> +                       mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
> +                                       (uint64_t)addrs[i]);
> +               } else {
> +                       desc->addr = (uint64_t)addrs[i];
> +                       pkt = xsk_umem__get_data(umem->mz->addr,
> +                                       desc->addr);
> +                       rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> +                                       desc->len);
> +               }
> +               tx_bytes += mbuf->pkt_len;
>         }
>
>         xsk_ring_prod__submit(&txq->tx, nb_pkts);
> @@ -313,6 +355,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>         txq->stats.tx_pkts += nb_pkts;
>         txq->stats.tx_bytes += tx_bytes;
>
> +       for (i = 0; i < nb_pkts; i++)
> +               rte_pktmbuf_free(bufs[i]);
> +

Is it ok to free the mbuf here?
If the AF_XDP is running pmd_zc=true, the packet mbuf is still in the tx
ring and might not be sent out yet.

Regards,
William


>         return nb_pkts;
>  }
>
> @@ -454,18 +499,16 @@ eth_dev_close(struct rte_eth_dev *dev)
>                 if (rxq->umem == NULL)
>                         break;
>                 xsk_socket__delete(rxq->xsk);
> +               (void)xsk_umem__delete(rxq->umem->umem);
> +               xdp_umem_destroy(rxq->umem);
>         }
>
> -       (void)xsk_umem__delete(internals->umem->umem);
> -
>         /*
>          * MAC is not allocated dynamically, setting it to NULL would prevent
>          * from releasing it in rte_eth_dev_release_port.
>          */
>         dev->data->mac_addrs = NULL;
>
> -       xdp_umem_destroy(internals->umem);
> -
>         remove_xdp_program(internals);
>  }
>
> @@ -639,7 +682,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>                 goto err;
>         }
>
> -       internals->umem = rxq->umem;
> +       rxq->umem->pmd_zc = internals->pmd_zc;
>
>         dev->data->rx_queues[rx_queue_id] = rxq;
>         return 0;
> @@ -775,9 +818,8 @@ parse_name_arg(const char *key __rte_unused,
>  }
>
>  static int
> -parse_parameters(struct rte_kvargs *kvlist,
> -                char *if_name,
> -                int *queue_idx)
> +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
> +                               int *pmd_zc)
>  {
>         int ret;
>
> @@ -791,6 +833,11 @@ parse_parameters(struct rte_kvargs *kvlist,
>         if (ret < 0)
>                 goto free_kvlist;
>
> +       ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
> +                                &parse_integer_arg, pmd_zc);
> +       if (ret < 0)
> +               goto free_kvlist;
> +
>  free_kvlist:
>         rte_kvargs_free(kvlist);
>         return ret;
> @@ -827,9 +874,8 @@ get_iface_info(const char *if_name,
>  }
>
>  static struct rte_eth_dev *
> -init_internals(struct rte_vdev_device *dev,
> -              const char *if_name,
> -              int queue_idx)
> +init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
> +                                       int pmd_zc)
>  {
>         const char *name = rte_vdev_device_name(dev);
>         const unsigned int numa_node = dev->device.numa_node;
> @@ -843,6 +889,7 @@ init_internals(struct rte_vdev_device *dev,
>                 return NULL;
>
>         internals->queue_idx = queue_idx;
> +       internals->pmd_zc = pmd_zc;
>         strlcpy(internals->if_name, if_name, IFNAMSIZ);
>
>         for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
> @@ -868,6 +915,9 @@ init_internals(struct rte_vdev_device *dev,
>         /* Let rte_eth_dev_close() release the port resources. */
>         eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
>
> +       if (internals->pmd_zc)
> +               AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
> +
>         return eth_dev;
>
>  err:
> @@ -883,6 +933,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>         int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
>         struct rte_eth_dev *eth_dev = NULL;
>         const char *name;
> +       int pmd_zc = 0;
>
>         AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
>                 rte_vdev_device_name(dev));
> @@ -909,7 +960,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>         if (dev->device.numa_node == SOCKET_ID_ANY)
>                 dev->device.numa_node = rte_socket_id();
>
> -       if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) {
> +       if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) {
>                 AF_XDP_LOG(ERR, "Invalid kvargs value\n");
>                 return -EINVAL;
>         }
> @@ -919,7 +970,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>                 return -EINVAL;
>         }
>
> -       eth_dev = init_internals(dev, if_name, xsk_queue_idx);
> +       eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc);
>         if (eth_dev == NULL) {
>                 AF_XDP_LOG(ERR, "Failed to init internals\n");
>                 return -1;
> @@ -961,7 +1012,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = {
>  RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
>  RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
>                               "iface=<string> "
> -                             "queue=<int> ");
> +                             "queue=<int> "
> +                             "pmd_zero_copy=<0|1>");
>
>  RTE_INIT(af_xdp_init_log)
>  {
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
  2019-06-11 16:16     ` William Tu
@ 2019-06-12 10:03       ` Ye Xiaolong
  2019-06-13  0:32         ` William Tu
  0 siblings, 1 reply; 16+ messages in thread
From: Ye Xiaolong @ 2019-06-12 10:03 UTC (permalink / raw)
  To: William Tu
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, dev

Hi, 

On 06/11, William Tu wrote:
[snip]
>> @@ -294,16 +326,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>>
>>                 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
>>                 mbuf = bufs[i];
>> -
>> -               desc->addr = (uint64_t)addrs[i];
>>                 desc->len = mbuf->pkt_len;
>> -               pkt = xsk_umem__get_data(umem->mz->addr,
>> -                                        desc->addr);
>> -               rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
>> -                          desc->len);
>> -               tx_bytes += mbuf->pkt_len;
>>
>> -               rte_pktmbuf_free(mbuf);
>> +               /*
>> +                * We need to make sure the external mbuf address is within
>> +                * current port's umem memzone range
>> +                */
>> +               if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
>> +                               in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
>> +                       desc->addr = (uint64_t)mbuf->buf_addr -
>> +                               umem->mz->addr_64;
>> +                       mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
>> +                                       (uint64_t)addrs[i]);
>> +               } else {
>> +                       desc->addr = (uint64_t)addrs[i];
>> +                       pkt = xsk_umem__get_data(umem->mz->addr,
>> +                                       desc->addr);
>> +                       rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
>> +                                       desc->len);
>> +               }
>> +               tx_bytes += mbuf->pkt_len;
>>         }
>>
>>         xsk_ring_prod__submit(&txq->tx, nb_pkts);
>> @@ -313,6 +355,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>>         txq->stats.tx_pkts += nb_pkts;
>>         txq->stats.tx_bytes += tx_bytes;
>>
>> +       for (i = 0; i < nb_pkts; i++)
>> +               rte_pktmbuf_free(bufs[i]);
>> +
>
>Is it ok to free the mbuf here?
>If the AF_XDP is running pmd_zc=true, the packet mbuf is still in the tx
>ring and might not be sent out yet.

For pmd_zc=ture case, here mbuf->buf_addr has been exchanged to available addr
dequeued from umem->buf_ring, rte_pktmbuf_free would just call the free callback
umem_buf_release_to_fq to enqueue the addr to the buf_ring.

Thanks,
Xiaolong

>
>Regards,
>William
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
  2019-06-12 10:03       ` Ye Xiaolong
@ 2019-06-13  0:32         ` William Tu
  0 siblings, 0 replies; 16+ messages in thread
From: William Tu @ 2019-06-13  0:32 UTC (permalink / raw)
  To: Ye Xiaolong
  Cc: Qi Zhang, John McNamara, Marko Kovacevic, Karlsson Magnus,
	Topel Bjorn, dev

On Tue, Jun 11, 2019 at 8:21 PM Ye Xiaolong <xiaolong.ye@intel.com> wrote:
>
> Hi,
>
> On 06/11, William Tu wrote:
> [snip]
> >> @@ -294,16 +326,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> >>
> >>                 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
> >>                 mbuf = bufs[i];
> >> -
> >> -               desc->addr = (uint64_t)addrs[i];
> >>                 desc->len = mbuf->pkt_len;
> >> -               pkt = xsk_umem__get_data(umem->mz->addr,
> >> -                                        desc->addr);
> >> -               rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> >> -                          desc->len);
> >> -               tx_bytes += mbuf->pkt_len;
> >>
> >> -               rte_pktmbuf_free(mbuf);
> >> +               /*
> >> +                * We need to make sure the external mbuf address is within
> >> +                * current port's umem memzone range
> >> +                */
> >> +               if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
> >> +                               in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
> >> +                       desc->addr = (uint64_t)mbuf->buf_addr -
> >> +                               umem->mz->addr_64;
> >> +                       mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
> >> +                                       (uint64_t)addrs[i]);
> >> +               } else {
> >> +                       desc->addr = (uint64_t)addrs[i];
> >> +                       pkt = xsk_umem__get_data(umem->mz->addr,
> >> +                                       desc->addr);
> >> +                       rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> >> +                                       desc->len);
> >> +               }
> >> +               tx_bytes += mbuf->pkt_len;
> >>         }
> >>
> >>         xsk_ring_prod__submit(&txq->tx, nb_pkts);
> >> @@ -313,6 +355,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> >>         txq->stats.tx_pkts += nb_pkts;
> >>         txq->stats.tx_bytes += tx_bytes;
> >>
> >> +       for (i = 0; i < nb_pkts; i++)
> >> +               rte_pktmbuf_free(bufs[i]);
> >> +
> >
> >Is it ok to free the mbuf here?
> >If the AF_XDP is running pmd_zc=true, the packet mbuf is still in the tx
> >ring and might not be sent out yet.
>
> For pmd_zc=ture case, here mbuf->buf_addr has been exchanged to available addr
> dequeued from umem->buf_ring, rte_pktmbuf_free would just call the free callback
> umem_buf_release_to_fq to enqueue the addr to the buf_ring.
>

I see, thanks for the explanation.
William

> Thanks,
> Xiaolong
>
> >
> >Regards,
> >William
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, back to index

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support Xiaolong Ye
2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
2019-05-30 15:31     ` Stephen Hemminger
2019-05-31  1:49       ` Ye Xiaolong
2019-06-11 16:16     ` William Tu
2019-06-12 10:03       ` Ye Xiaolong
2019-06-13  0:32         ` William Tu
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
2019-05-30 15:32     ` Stephen Hemminger
2019-05-31  1:53       ` Ye Xiaolong
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 3/3] net/af_xdp: remove unused struct member Xiaolong Ye
2019-06-10 16:54   ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Ferruh Yigit

DPDK-dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/dpdk-dev/0 dpdk-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dpdk-dev dpdk-dev/ https://lore.kernel.org/dpdk-dev \
		dev@dpdk.org dpdk-dev@archiver.kernel.org
	public-inbox-index dpdk-dev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox