DPDK-dev Archive on lore.kernel.org
 help / color / Atom feed
From: Xiaolong Ye <xiaolong.ye@intel.com>
To: Xiaolong Ye <xiaolong.ye@intel.com>,
	Qi Zhang <qi.z.zhang@intel.com>,
	John McNamara <john.mcnamara@intel.com>,
	Marko Kovacevic <marko.kovacevic@intel.com>
Cc: Karlsson Magnus <magnus.karlsson@intel.com>,
	Topel Bjorn <bjorn.topel@intel.com>,
	dev@dpdk.org
Subject: [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support
Date: Wed, 15 May 2019 16:38:42 +0800
Message-ID: <20190515083842.15116-4-xiaolong.ye@intel.com> (raw)
In-Reply-To: <20190515083842.15116-1-xiaolong.ye@intel.com>

This patch enables busy-poll support for AF_XDP pmd. With busy-poll, the
kernel driver is executed in process context by calling the poll() syscall.

The main advantage of busy-poll feature is that all processing occurs on a
single core. This eliminates the core-to-core cache transfers that occur
between the application and the softirqd processing on another core.

The drawback of busy-poll is that it will downgrade the max throughput due
to syscall, but from a per-core perspective, the performance is better as
normal mode runs on two cores and busy-poll only runs on a single core.

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 doc/guides/nics/af_xdp.rst          |  1 +
 drivers/net/af_xdp/rte_eth_af_xdp.c | 48 ++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
index 18defcda3..e42065170 100644
--- a/doc/guides/nics/af_xdp.rst
+++ b/doc/guides/nics/af_xdp.rst
@@ -29,6 +29,7 @@ The following options can be provided to set up an af_xdp port in DPDK.
 *   ``iface`` - name of the Kernel interface to attach to (required);
 *   ``start_queue`` - starting netdev queue id (optional, default 0);
 *   ``queue_count`` - total netdev queue number (optional, default 1);
+*   ``busy_poll_size`` - busy poll batch size (optional, default 0);
 *   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
 
 Prerequisites
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 9a4510701..1e46a4ef4 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <netinet/in.h>
+#include <poll.h>
 #include <net/if.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
@@ -72,6 +73,7 @@ struct xsk_umem_info {
 	struct rte_ring *buf_ring;
 	const struct rte_memzone *mz;
 	int pmd_zc;
+	int busy_poll;
 };
 
 struct rx_stats {
@@ -114,6 +116,7 @@ struct pmd_internals {
 	int queue_cnt;
 
 	int pmd_zc;
+	int busy_poll_size;
 	struct ether_addr eth_addr;
 	struct xsk_umem_info *umem;
 	struct rte_mempool *mb_pool_share;
@@ -126,12 +129,14 @@ struct pmd_internals {
 #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
 #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
 #define ETH_AF_XDP_PMD_ZC_ARG			"pmd_zero_copy"
+#define ETH_AF_XDP_BUSY_POLL_SIZE_ARG		"busy_poll_size"
 
 static const char * const valid_arguments[] = {
 	ETH_AF_XDP_IFACE_ARG,
 	ETH_AF_XDP_START_QUEUE_ARG,
 	ETH_AF_XDP_QUEUE_COUNT_ARG,
 	ETH_AF_XDP_PMD_ZC_ARG,
+	ETH_AF_XDP_BUSY_POLL_SIZE_ARG,
 	NULL
 };
 
@@ -191,6 +196,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_ring_cons *rx = &rxq->rx;
 	struct xsk_umem_info *umem = rxq->umem;
 	struct xsk_ring_prod *fq = &umem->fq;
+	struct pollfd pfds[1];
 	uint32_t idx_rx = 0;
 	uint32_t free_thresh = fq->size >> 1;
 	int pmd_zc = umem->pmd_zc;
@@ -199,6 +205,15 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned long rx_bytes = 0;
 	int rcvd, i;
 
+	if (umem->busy_poll) {
+		memset(pfds, 0, sizeof(pfds));
+		pfds[0].fd = xsk_socket__fd(rxq->xsk);
+		pfds[0].events = POLLIN;
+
+		if (poll(pfds, 1, 0) <= 0)
+			return 0;
+	}
+
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
 
 	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0))
@@ -305,12 +320,23 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->pair->umem;
 	struct rte_mbuf *mbuf;
+	struct pollfd pfds[1];
 	int pmd_zc = umem->pmd_zc;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
 	int i;
 	uint32_t idx_tx;
 
+	if (umem->busy_poll) {
+		memset(pfds, 0, sizeof(pfds));
+		pfds[0].fd = xsk_socket__fd(txq->pair->xsk);
+		pfds[0].events = POLLOUT;
+		if (poll(pfds, 1, 0) <= 0)
+			return 0;
+		if (!(pfds[0].revents & POLLOUT))
+			return 0;
+	}
+
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
 
 	pull_umem_cq(umem, nb_pkts);
@@ -615,6 +641,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
 	cfg.rx_size = ring_size;
 	cfg.tx_size = ring_size;
 	cfg.libbpf_flags = 0;
+	cfg.busy_poll = internals->busy_poll_size;
 	cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	cfg.bind_flags = 0;
 	ret = xsk_socket__create(&rxq->xsk, internals->if_name,
@@ -680,10 +707,14 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	internals->umem = rxq->umem;
 	internals->umem->pmd_zc = internals->pmd_zc;
+	internals->umem->busy_poll = internals->busy_poll_size ? 1 : 0;
 
 	if (internals->umem->pmd_zc)
 		AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
 
+	if (internals->umem->busy_poll)
+		AF_XDP_LOG(INFO, "Busy poll enabled.\n");
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
@@ -818,7 +849,7 @@ parse_name_arg(const char *key __rte_unused,
 
 static int
 parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
-			int *queue_cnt, int *pmd_zc)
+			int *queue_cnt, int *pmd_zc, int *busy_poll_size)
 {
 	int ret;
 
@@ -844,6 +875,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
 	if (ret < 0)
 		goto free_kvlist;
 
+	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUSY_POLL_SIZE_ARG,
+				 &parse_integer_arg, busy_poll_size);
+	if (ret < 0)
+		goto free_kvlist;
+
 free_kvlist:
 	rte_kvargs_free(kvlist);
 	return ret;
@@ -881,7 +917,8 @@ get_iface_info(const char *if_name,
 
 static struct rte_eth_dev *
 init_internals(struct rte_vdev_device *dev, const char *if_name,
-			int start_queue_idx, int queue_cnt, int pmd_zc)
+			int start_queue_idx, int queue_cnt, int pmd_zc,
+				int busy_poll_size)
 {
 	const char *name = rte_vdev_device_name(dev);
 	const unsigned int numa_node = dev->device.numa_node;
@@ -897,6 +934,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
 	internals->start_queue_idx = start_queue_idx;
 	internals->queue_cnt = queue_cnt;
 	internals->pmd_zc = pmd_zc;
+	internals->busy_poll_size = busy_poll_size;
 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
 
 	for (i = 0; i < queue_cnt; i++) {
@@ -941,6 +979,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	struct rte_eth_dev *eth_dev = NULL;
 	const char *name;
 	int pmd_zc = 0;
+	int busy_poll_size = 0;
 
 	AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
 		rte_vdev_device_name(dev));
@@ -968,7 +1007,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 		dev->device.numa_node = rte_socket_id();
 
 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
-			     &xsk_queue_cnt, &pmd_zc) < 0) {
+			     &xsk_queue_cnt, &pmd_zc, &busy_poll_size) < 0) {
 		AF_XDP_LOG(ERR, "Invalid kvargs value\n");
 		return -EINVAL;
 	}
@@ -979,7 +1018,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
 	}
 
 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
-					xsk_queue_cnt, pmd_zc);
+				 xsk_queue_cnt, pmd_zc, busy_poll_size);
 	if (eth_dev == NULL) {
 		AF_XDP_LOG(ERR, "Failed to init internals\n");
 		return -1;
@@ -1023,6 +1062,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
 			      "iface=<string> "
 			      "start_queue=<int> "
 			      "queue_count=<int> "
+			      "busy_poll_size=<int> "
 			      "pmd_zero_copy=<0|1>");
 
 RTE_INIT(af_xdp_init_log)
-- 
2.17.1


  parent reply index

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
2019-05-15  8:38 ` Xiaolong Ye [this message]
2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
2019-05-30 15:31     ` Stephen Hemminger
2019-05-31  1:49       ` Ye Xiaolong
2019-06-11 16:16     ` William Tu
2019-06-12 10:03       ` Ye Xiaolong
2019-06-13  0:32         ` William Tu
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
2019-05-30 15:32     ` Stephen Hemminger
2019-05-31  1:53       ` Ye Xiaolong
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 3/3] net/af_xdp: remove unused struct member Xiaolong Ye
2019-06-10 16:54   ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Ferruh Yigit

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190515083842.15116-4-xiaolong.ye@intel.com \
    --to=xiaolong.ye@intel.com \
    --cc=bjorn.topel@intel.com \
    --cc=dev@dpdk.org \
    --cc=john.mcnamara@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=marko.kovacevic@intel.com \
    --cc=qi.z.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK-dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/dpdk-dev/0 dpdk-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dpdk-dev dpdk-dev/ https://lore.kernel.org/dpdk-dev \
		dev@dpdk.org dpdk-dev@archiver.kernel.org
	public-inbox-index dpdk-dev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox