All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qi Zhang <qi.z.zhang@intel.com>
To: dev@dpdk.org
Cc: magnus.karlsson@intel.com, bjorn.topel@intel.com,
	Qi Zhang <qi.z.zhang@intel.com>
Subject: [RFC v2 5/7] net/af_xdp: enable share mempool
Date: Thu,  8 Mar 2018 21:52:47 +0800	[thread overview]
Message-ID: <20180308135249.28187-6-qi.z.zhang@intel.com> (raw)
In-Reply-To: <20180308135249.28187-1-qi.z.zhang@intel.com>

Try to check if external mempool (from rx_queue_setup) is fit for
af_xdp, if it is, it will be registered to af_xdp socket directly and
there will be no packet data copy on Rx and Tx.

Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 193 +++++++++++++++++++++++-------------
 1 file changed, 126 insertions(+), 67 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 65c4c37bf..7e839f0da 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -56,7 +56,6 @@ struct xdp_umem {
 	unsigned int frame_size;
 	unsigned int frame_size_log2;
 	unsigned int nframes;
-	int mr_fd;
 	struct rte_mempool *mb_pool;
 };
 
@@ -69,6 +68,7 @@ struct pmd_internals {
 	struct xdp_queue tx;
 	struct xdp_umem *umem;
 	struct rte_mempool *ext_mb_pool;
+	uint8_t share_mb_pool;
 
 	unsigned long rx_pkts;
 	unsigned long rx_bytes;
@@ -159,20 +159,30 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		char *pkt;
 		uint32_t idx = descs[i].idx;
 
-		mbuf = rte_pktmbuf_alloc(internals->ext_mb_pool);
-		rte_pktmbuf_pkt_len(mbuf) =
-			rte_pktmbuf_data_len(mbuf) =
-			descs[i].len;
-		if (mbuf) {
-			pkt = get_pkt_data(internals, idx, descs[i].offset);
-			memcpy(rte_pktmbuf_mtod(mbuf, void *),
-			       pkt, descs[i].len);
-			rx_bytes += descs[i].len;
-			bufs[count++] = mbuf;
+		if (!internals->share_mb_pool) {
+			mbuf = rte_pktmbuf_alloc(internals->ext_mb_pool);
+			rte_pktmbuf_pkt_len(mbuf) =
+				rte_pktmbuf_data_len(mbuf) =
+				descs[i].len;
+			if (mbuf) {
+				pkt = get_pkt_data(internals, idx,
+						   descs[i].offset);
+				memcpy(rte_pktmbuf_mtod(mbuf, void *), pkt,
+				       descs[i].len);
+				rx_bytes += descs[i].len;
+				bufs[count++] = mbuf;
+			} else {
+				dropped++;
+			}
+			rte_pktmbuf_free(idx_to_mbuf(internals, idx));
 		} else {
-			dropped++;
+			mbuf = idx_to_mbuf(internals, idx);
+			rte_pktmbuf_pkt_len(mbuf) =
+				rte_pktmbuf_data_len(mbuf) =
+				descs[i].len;
+			bufs[count++] = mbuf;
+			rx_bytes += descs[i].len;
 		}
-		rte_pktmbuf_free(idx_to_mbuf(internals, idx));
 	}
 
 	internals->rx_pkts += (rcvd - dropped);
@@ -206,52 +216,72 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	uint16_t i, valid;
 	unsigned long tx_bytes = 0;
 	int ret;
+	uint8_t share_mempool = 0;
 
 	nb_pkts = nb_pkts < ETH_AF_XDP_TX_BATCH_SIZE ?
 		  nb_pkts : ETH_AF_XDP_TX_BATCH_SIZE;
 
 	if (txq->num_free < ETH_AF_XDP_TX_BATCH_SIZE * 2) {
 		int n = xq_deq(txq, descs, ETH_AF_XDP_TX_BATCH_SIZE);
-
 		for (i = 0; i < n; i++)
 			rte_pktmbuf_free(idx_to_mbuf(internals, descs[i].idx));
 	}
 
 	nb_pkts = nb_pkts > txq->num_free ? txq->num_free : nb_pkts;
-	ret = rte_mempool_get_bulk(internals->umem->mb_pool,
-				   (void *)mbufs,
-				   nb_pkts);
-	if (ret)
+	if (nb_pkts == 0)
 		return 0;
 
+	if (bufs[0]->pool == internals->ext_mb_pool && internals->share_mb_pool)
+		share_mempool = 1;
+
+	if (!share_mempool) {
+		ret = rte_mempool_get_bulk(internals->umem->mb_pool,
+					   (void *)mbufs,
+					   nb_pkts);
+		if (ret)
+			return 0;
+	}
+
 	valid = 0;
 	for (i = 0; i < nb_pkts; i++) {
 		char *pkt;
-		unsigned int buf_len =
-			internals->umem->frame_size - ETH_AF_XDP_DATA_HEADROOM;
 		mbuf = bufs[i];
-		if (mbuf->pkt_len <= buf_len) {
-			descs[valid].idx = mbuf_to_idx(internals, mbufs[i]);
-			descs[valid].offset = ETH_AF_XDP_DATA_HEADROOM;
-			descs[valid].flags = 0;
-			descs[valid].len = mbuf->pkt_len;
-			pkt = get_pkt_data(internals, descs[i].idx,
-					   descs[i].offset);
-			memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-					   descs[i].len);
-			valid++;
+		if (!share_mempool) {
+			if (mbuf->pkt_len <=
+				(internals->umem->frame_size -
+				 ETH_AF_XDP_DATA_HEADROOM)) {
+				descs[valid].idx =
+					mbuf_to_idx(internals, mbufs[i]);
+				descs[valid].offset = ETH_AF_XDP_DATA_HEADROOM;
+				descs[valid].flags = 0;
+				descs[valid].len = mbuf->pkt_len;
+				pkt = get_pkt_data(internals, descs[i].idx,
+						   descs[i].offset);
+				memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+				       descs[i].len);
+				valid++;
+				tx_bytes += mbuf->pkt_len;
+			}
+			/* packet will be consumed anyway */
+			rte_pktmbuf_free(mbuf);
+		} else {
+			descs[i].idx = mbuf_to_idx(internals, mbuf);
+			descs[i].offset = ETH_AF_XDP_DATA_HEADROOM;
+			descs[i].flags = 0;
+			descs[i].len = mbuf->pkt_len;
 			tx_bytes += mbuf->pkt_len;
+			valid++;
 		}
-		/* packet will be consumed anyway */
-		rte_pktmbuf_free(mbuf);
 	}
 
 	xq_enq(txq, descs, valid);
 	kick_tx(internals->sfd);
 
-	if (valid < nb_pkts) {
-		for (i = valid; i < nb_pkts; i++)
-			rte_pktmbuf_free(mbufs[i]);
+	if (!share_mempool) {
+		if (valid < nb_pkts) {
+			for (i = valid; i < nb_pkts; i++)
+				rte_pktmbuf_free(mbufs[i]);
+		}
 	}
 
 	internals->err_pkts += (nb_pkts - valid);
@@ -376,46 +406,81 @@ static void *get_base_addr(struct rte_mempool *mb_pool)
 	return NULL;
 }
 
-static struct xdp_umem *xsk_alloc_and_mem_reg_buffers(int sfd,
-						      size_t nbuffers,
-						      const char *pool_name)
+static uint8_t
+check_mempool(struct rte_mempool *mp)
+{
+	RTE_ASSERT(mp);
+
+	/* must continues */
+	if (mp->nb_mem_chunks > 1)
+		return 0;
+
+	/* check header size */
+	if (mp->header_size != RTE_CACHE_LINE_SIZE)
+		return 0;
+
+	/* check base address */
+	if ((uint64_t)get_base_addr(mp) % getpagesize() != 0)
+		return 0;
+
+	/* check chunk size */
+	if ((mp->elt_size + mp->header_size + mp->trailer_size) %
+			ETH_AF_XDP_FRAME_SIZE != 0)
+		return 0;
+
+	return 1;
+}
+
+static struct xdp_umem *
+xsk_alloc_and_mem_reg_buffers(struct pmd_internals *internals)
 {
 	struct xdp_mr_req req = { .frame_size = ETH_AF_XDP_FRAME_SIZE,
 				  .data_headroom = ETH_AF_XDP_DATA_HEADROOM };
+	char pool_name[0x100];
+	int nbuffers;
 	struct xdp_umem *umem = calloc(1, sizeof(*umem));
 
 	if (!umem)
 		return NULL;
 
-	umem->mb_pool =
-		rte_pktmbuf_pool_create_with_flags(
-			pool_name, nbuffers,
-			250, 0,
-			(ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_MBUF_OVERHEAD),
-			MEMPOOL_F_NO_SPREAD | MEMPOOL_F_PAGE_ALIGN,
-			SOCKET_ID_ANY);
-
-	if (!umem->mb_pool) {
-		free(umem);
-		return NULL;
-	}
+	internals->share_mb_pool = check_mempool(internals->ext_mb_pool);
+	if (!internals->share_mb_pool) {
+		snprintf(pool_name, 0x100, "%s_%s_%d", "af_xdp_pool",
+			 internals->if_name, internals->queue_idx);
+		umem->mb_pool =
+			rte_pktmbuf_pool_create_with_flags(
+				pool_name,
+				ETH_AF_XDP_NUM_BUFFERS,
+				250, 0,
+				(ETH_AF_XDP_FRAME_SIZE -
+				 ETH_AF_XDP_MBUF_OVERHEAD),
+				MEMPOOL_F_NO_SPREAD | MEMPOOL_F_PAGE_ALIGN,
+				SOCKET_ID_ANY);
+		if (!umem->mb_pool) {
+			free(umem);
+			return NULL;
+		}
 
-	if (umem->mb_pool->nb_mem_chunks > 1) {
-		rte_mempool_free(umem->mb_pool);
-		free(umem);
-		return NULL;
+		if (umem->mb_pool->nb_mem_chunks > 1) {
+			rte_mempool_free(umem->mb_pool);
+			free(umem);
+			return NULL;
+		}
+		nbuffers = ETH_AF_XDP_NUM_BUFFERS;
+	} else {
+		umem->mb_pool = internals->ext_mb_pool;
+		nbuffers = umem->mb_pool->populated_size;
 	}
 
 	req.addr = (uint64_t)get_base_addr(umem->mb_pool);
-	req.len = nbuffers * req.frame_size;
-	setsockopt(sfd, SOL_XDP, XDP_MEM_REG, &req, sizeof(req));
+	req.len = ETH_AF_XDP_NUM_BUFFERS * req.frame_size;
+	setsockopt(internals->sfd, SOL_XDP, XDP_MEM_REG, &req, sizeof(req));
 
 	umem->frame_size = ETH_AF_XDP_FRAME_SIZE;
 	umem->frame_size_log2 = 11;
 	umem->buffer = (char *)req.addr;
 	umem->size = nbuffers * req.frame_size;
 	umem->nframes = nbuffers;
-	umem->mr_fd = sfd;
 
 	return umem;
 }
@@ -425,19 +490,13 @@ xdp_configure(struct pmd_internals *internals)
 {
 	struct sockaddr_xdp sxdp;
 	struct xdp_ring_req req;
-	char pool_name[0x100];
-
 	int ret = 0;
 
-	snprintf(pool_name, 0x100, "%s_%s_%d", "af_xdp_pool",
-		 internals->if_name, internals->queue_idx);
-	internals->umem = xsk_alloc_and_mem_reg_buffers(internals->sfd,
-							ETH_AF_XDP_NUM_BUFFERS,
-							pool_name);
+	internals->umem = xsk_alloc_and_mem_reg_buffers(internals);
 	if (!internals->umem)
 		return -1;
 
-	req.mr_fd = internals->umem->mr_fd;
+	req.mr_fd = internals->sfd;
 	req.desc_nr = internals->ring_size;
 
 	ret = setsockopt(internals->sfd, SOL_XDP, XDP_RX_RING,
@@ -498,7 +557,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 		RTE_PKTMBUF_HEADROOM;
 	data_size = internals->umem->frame_size;
 
-	if (data_size > buf_size) {
+	if (data_size - ETH_AF_XDP_DATA_HEADROOM > buf_size) {
 		RTE_LOG(ERR, PMD,
 			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
 			dev->device->name, data_size, buf_size);
@@ -764,7 +823,7 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 
 	internals = eth_dev->data->dev_private;
 	if (internals->umem) {
-		if (internals->umem->mb_pool)
+		if (internals->umem->mb_pool && !internals->share_mb_pool)
 			rte_mempool_free(internals->umem->mb_pool);
 		rte_free(internals->umem);
 	}
-- 
2.13.6

  parent reply	other threads:[~2018-03-08 13:52 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-08 13:52 [RFC v2 0/7] PMD driver for AF_XDP Qi Zhang
2018-03-08 13:52 ` [RFC v2 1/7] net/af_xdp: new PMD driver Qi Zhang
2018-03-08 13:52 ` [RFC v2 2/7] lib/mbuf: enable parse flags when create mempool Qi Zhang
2018-03-08 13:52 ` [RFC v2 3/7] lib/mempool: allow page size aligned mempool Qi Zhang
2018-03-08 13:52 ` [RFC v2 4/7] net/af_xdp: use mbuf mempool for buffer management Qi Zhang
2018-03-08 13:52 ` Qi Zhang [this message]
2018-03-08 13:52 ` [RFC v2 6/7] net/af_xdp: load BPF file Qi Zhang
2018-03-08 14:20   ` Zhang, Qi Z
2018-03-08 23:15   ` Stephen Hemminger
2018-05-09  7:02     ` Björn Töpel
2018-03-08 13:52 ` [RFC v2 7/7] app/testpmd: enable parameter for mempool flags Qi Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180308135249.28187-6-qi.z.zhang@intel.com \
    --to=qi.z.zhang@intel.com \
    --cc=bjorn.topel@intel.com \
    --cc=dev@dpdk.org \
    --cc=magnus.karlsson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.