From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by smtp.lore.kernel.org (Postfix) with ESMTP id CA88EC433F5 for ; Fri, 24 Dec 2021 16:46:28 +0000 (UTC) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 2F8C6410FC; Fri, 24 Dec 2021 17:46:25 +0100 (CET) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id 7D0CD410F7 for ; Fri, 24 Dec 2021 17:46:23 +0100 (CET) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id F1F651FB; Fri, 24 Dec 2021 08:46:22 -0800 (PST) Received: from net-x86-dell-8268.shanghai.arm.com (net-x86-dell-8268.shanghai.arm.com [10.169.210.111]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id DEFA43F5A1; Fri, 24 Dec 2021 08:46:20 -0800 (PST) From: Feifei Wang To: Beilei Xing , Ruifeng Wang Cc: dev@dpdk.org, nd@arm.com, Feifei Wang , Honnappa Nagarahalli Subject: [RFC PATCH v1 1/4] net/i40e: enable direct re-arm mode Date: Sat, 25 Dec 2021 00:46:09 +0800 Message-Id: <20211224164613.32569-2-feifei.wang2@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20211224164613.32569-1-feifei.wang2@arm.com> References: <20211224164613.32569-1-feifei.wang2@arm.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org For i40e driver, enable direct re-arm mode. This patch supports the case of mapping Rx/Tx queues from the same single lcore. Suggested-by: Honnappa Nagarahalli Signed-off-by: Feifei Wang Reviewed-by: Ruifeng Wang --- drivers/net/i40e/i40e_rxtx.h | 4 + drivers/net/i40e/i40e_rxtx_vec_neon.c | 149 +++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index 5e6eecc501..1fdf4305f4 100644 --- a/drivers/net/i40e/i40e_rxtx.h +++ b/drivers/net/i40e/i40e_rxtx.h @@ -102,6 +102,8 @@ struct i40e_rx_queue { uint16_t rxrearm_nb; /**< number of remaining to be re-armed */ uint16_t rxrearm_start; /**< the idx we start the re-arming from */ + uint16_t direct_rxrearm_port; /** device TX port ID for direct re-arm mode */ + uint16_t direct_rxrearm_queue; /** TX queue index for direct re-arm mode */ uint64_t mbuf_initializer; /**< value to init mbufs */ uint16_t port_id; /**< device port ID */ @@ -121,6 +123,8 @@ struct i40e_rx_queue { uint16_t rx_using_sse; /**qrx_tail, rx_id); } +static inline void +i40e_rxq_rearm_direct_single(struct i40e_rx_queue *rxq) +{ + struct rte_eth_dev *dev; + struct i40e_tx_queue *txq; + volatile union i40e_rx_desc *rxdp; + struct i40e_tx_entry *txep; + struct i40e_rx_entry *rxep; + uint16_t tx_port_id, tx_queue_id; + uint16_t rx_id; + struct rte_mbuf *mb0, *mb1, *m; + uint64x2_t dma_addr0, dma_addr1; + uint64x2_t zero = vdupq_n_u64(0); + uint64_t paddr; + uint16_t i, n; + uint16_t nb_rearm = 0; + + rxdp = rxq->rx_ring + rxq->rxrearm_start; + rxep = &rxq->sw_ring[rxq->rxrearm_start]; + + tx_port_id = rxq->direct_rxrearm_port; + tx_queue_id = rxq->direct_rxrearm_queue; + dev = &rte_eth_devices[tx_port_id]; + txq = dev->data->tx_queues[tx_queue_id]; + + /* tx_rs_thresh must be equal to + * RTE_I40E_RXQ_REARM_THRESH in + * direct re-arm mode due to + * tx_next_dd update based on the + * number of free buffers in the + * next time + */ + n = RTE_I40E_RXQ_REARM_THRESH; + + if (txq->nb_tx_free < txq->tx_free_thresh) { + /* check DD bits on threshold descriptor */ + if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) { + goto mempool_bulk; + } + + /* first buffer to free from S/W ring is at index + * tx_next_dd - (tx_rs_thresh-1) + */ + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; + + if (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) { + /* directly put mbufs from Tx to Rx, + * and initialize the mbufs in vector, + * process 2 mbufs in one loop + */ + for (i = 0; i < n; i += 2, rxep += 2, txep += 2) { + rxep[0].mbuf = txep[0].mbuf; + rxep[1].mbuf = txep[1].mbuf; + + /* Initialize rxdp descs */ + mb0 = txep[0].mbuf; + mb1 = txep[1].mbuf; + + paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr0 = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); + + paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr1 = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); + } + } else { + for (i = 0; i < n; i++) { + m = rte_pktmbuf_prefree_seg(txep[i].mbuf); + if (m != NULL) { + rxep[i].mbuf = m; + + /* Initialize rxdp descs */ + paddr = m->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr0 = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); + nb_rearm++; + } + } + n = nb_rearm; + } + + /* update counters for Tx */ + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + RTE_I40E_RXQ_REARM_THRESH); + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + RTE_I40E_RXQ_REARM_THRESH); + if (txq->tx_next_dd >= txq->nb_tx_desc) + txq->tx_next_dd = (uint16_t)(RTE_I40E_RXQ_REARM_THRESH - 1); + } else { +mempool_bulk: + /* if TX did not free bufs into Rx sw-ring, + * get new bufs from mempool + */ + if (unlikely(rte_mempool_get_bulk(rxq->mp, (void *)rxep, n) < 0)) { + if (rxq->rxrearm_nb + n >= rxq->nb_rx_desc) { + for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + rxep[i].mbuf = &rxq->fake_mbuf; + vst1q_u64((uint64_t *)&rxdp[i].read, zero); + } + } + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += n; + return; + } + + /* Initialize the mbufs in vector, process 2 mbufs in one loop */ + for (i = 0; i < n; i += 2, rxep += 2) { + mb0 = rxep[0].mbuf; + mb1 = rxep[1].mbuf; + + paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr0 = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); + + paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM; + dma_addr1 = vdupq_n_u64(paddr); + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); + } + } + + /* Update the descriptor initializer index */ + rxq->rxrearm_start += n; + rx_id = rxq->rxrearm_start - 1; + + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { + rxq->rxrearm_start = 0; + rx_id = rxq->nb_rx_desc - 1; + } + + rxq->rxrearm_nb -= n; + + rte_io_wmb(); + /* Update the tail pointer on the NIC */ + I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); +} + static inline void desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4], struct rte_mbuf **rx_pkts) @@ -244,8 +385,12 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act */ - if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) - i40e_rxq_rearm(rxq); + if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) { + if (rxq->direct_rxrearm_enable) + i40e_rxq_rearm_direct_single(rxq); + else + i40e_rxq_rearm(rxq); + } /* Before we start moving massive data around, check to see if * there is actually a packet available -- 2.25.1