All of lore.kernel.org
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD
@ 2020-03-16  7:45 Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option Leyi Rong
                   ` (16 more replies)
  0 siblings, 17 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

This patchset enable framework for advanced iAVF, includes
query DDP pkg info, flexible descriptor support, FDIR mark
id and RSS hash support.

Leyi Rong (12):
  net/iavf: remove 16B Rx descriptor compile option
  net/iavf: return error if opcode is mismatched
  net/iavf: support to query DDP package info
  net/iavf: flexible Rx descriptor support in normal path
  net/iavf: flexible Rx descriptor support in AVX path
  net/iavf: flexible Rx descriptor support in SSE path
  net/iavf: add flow director enabled switch value
  net/iavf: support flow mark in normal data path
  net/iavf: support flow mark in AVX path
  net/iavf: support flow mark in SSE path
  net/iavf: add RSS hash parsing in AVX path
  net/iavf: add RSS hash parsing in SSE path

 config/common_base                    |   1 -
 drivers/net/iavf/iavf.h               |  15 +
 drivers/net/iavf/iavf_ethdev.c        |  15 +
 drivers/net/iavf/iavf_rxtx.c          | 570 ++++++++++++++++-
 drivers/net/iavf/iavf_rxtx.h          |  62 +-
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 856 ++++++++++++++++++++++----
 drivers/net/iavf/iavf_rxtx_vec_sse.c  | 514 ++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c         |  86 ++-
 8 files changed, 1982 insertions(+), 137 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 02/12] net/iavf: return error if opcode is mismatched Leyi Rong
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Remove CONFIG_RTE_LIBRTE_IAVF_16BYTE_RX_DESC as
it's not supported in ice PF host driver.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 config/common_base                    |   1 -
 drivers/net/iavf/iavf_rxtx.c          |   2 -
 drivers/net/iavf/iavf_rxtx.h          |  14 +--
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 148 +++++++-------------------
 4 files changed, 42 insertions(+), 123 deletions(-)

diff --git a/config/common_base b/config/common_base
index c31175f9d..eea53cb35 100644
--- a/config/common_base
+++ b/config/common_base
@@ -346,7 +346,6 @@ CONFIG_RTE_LIBRTE_IAVF_DEBUG_TX=n
 CONFIG_RTE_LIBRTE_IAVF_DEBUG_TX_FREE=n
 CONFIG_RTE_LIBRTE_IAVF_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC=n
-CONFIG_RTE_LIBRTE_IAVF_16BYTE_RX_DESC=n
 #
 # Compile burst-oriented IPN3KE PMD driver
 #
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 9eccb7c41..fbb18a713 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -230,10 +230,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
 		rxd = &rxq->rx_ring[i];
 		rxd->read.pkt_addr = dma_addr;
 		rxd->read.hdr_addr = 0;
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 		rxd->read.rsvd1 = 0;
 		rxd->read.rsvd2 = 0;
-#endif
 
 		rxq->sw_ring[i] = mbuf;
 	}
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 09b5bd99e..416433504 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -57,12 +57,8 @@
 #define IAVF_TX_OFFLOAD_NOTSUP_MASK \
 		(PKT_TX_OFFLOAD_MASK ^ IAVF_TX_OFFLOAD_MASK)
 
-/* HW desc structure, both 16-byte and 32-byte types are supported */
-#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-#define iavf_rx_desc iavf_16byte_rx_desc
-#else
+/* HW desc structure, only 32-byte type is supported */
 #define iavf_rx_desc iavf_32byte_rx_desc
-#endif
 
 struct iavf_rxq_ops {
 	void (*release_mbufs)(struct iavf_rx_queue *rxq);
@@ -224,20 +220,12 @@ void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq,
 			    const volatile void *desc,
 			    uint16_t rx_id)
 {
-#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-	const volatile union iavf_16byte_rx_desc *rx_desc = desc;
-
-	printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n",
-	       rxq->queue_id, rx_id, rx_desc->read.pkt_addr,
-	       rx_desc->read.hdr_addr);
-#else
 	const volatile union iavf_32byte_rx_desc *rx_desc = desc;
 
 	printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64
 	       " QW2: 0x%016"PRIx64" QW3: 0x%016"PRIx64"\n", rxq->queue_id,
 	       rx_id, rx_desc->read.pkt_addr, rx_desc->read.hdr_addr,
 	       rx_desc->read.rsvd1, rx_desc->read.rsvd2);
-#endif
 }
 
 /* All the descriptors are 16 bytes, so just use one of them
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 2587083d8..4e1231162 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -40,7 +40,6 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		return;
 	}
 
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	struct rte_mbuf *mb0, *mb1;
 	__m128i dma_addr0, dma_addr1;
 	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
@@ -70,54 +69,6 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq)
 		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
 		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
 	}
-#else
-	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
-	__m256i dma_addr0_1, dma_addr2_3;
-	__m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
-	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
-	for (i = 0; i < IAVF_RXQ_REARM_THRESH;
-			i += 4, rxp += 4, rxdp += 4) {
-		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
-		__m256i vaddr0_1, vaddr2_3;
-
-		mb0 = rxp[0];
-		mb1 = rxp[1];
-		mb2 = rxp[2];
-		mb3 = rxp[3];
-
-		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) !=
-				offsetof(struct rte_mbuf, buf_addr) + 8);
-		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
-		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
-		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
-		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
-
-		/**
-		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
-		 * into the high lanes. Similarly for 2 & 3
-		 */
-		vaddr0_1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
-						vaddr1, 1);
-		vaddr2_3 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
-						vaddr3, 1);
-
-		/* convert pa to dma_addr hdr/data */
-		dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
-		dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
-
-		/* add headroom to pa values */
-		dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
-		dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
-
-		/* flush desc with pa dma_addr */
-		_mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
-		_mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
-	}
-
-#endif
 
 	rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
@@ -149,7 +100,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	/* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */
 	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
 	volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail;
-	const int avx_aligned = ((rxq->rx_tail & 1) == 0);
 
 	rte_prefetch0(rxdp);
 
@@ -292,8 +242,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
 				   PKT_RX_EIP_CKSUM_BAD);
 
-	RTE_SET_USED(avx_aligned); /* for 32B descriptors we don't use this */
-
 	uint16_t i, received;
 
 	for (i = 0, received = 0; i < nb_pkts;
@@ -309,61 +257,47 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 #endif
 
 		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
-#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
-		/* for AVX we need alignment otherwise loads are not atomic */
-		if (avx_aligned) {
-			/* load in descriptors, 2 at a time, in reverse order */
-			raw_desc6_7 = _mm256_load_si256((void *)(rxdp + 6));
-			rte_compiler_barrier();
-			raw_desc4_5 = _mm256_load_si256((void *)(rxdp + 4));
-			rte_compiler_barrier();
-			raw_desc2_3 = _mm256_load_si256((void *)(rxdp + 2));
-			rte_compiler_barrier();
-			raw_desc0_1 = _mm256_load_si256((void *)(rxdp + 0));
-		} else
-#endif
-		{
-			const __m128i raw_desc7 =
-				_mm_load_si128((void *)(rxdp + 7));
-			rte_compiler_barrier();
-			const __m128i raw_desc6 =
-				_mm_load_si128((void *)(rxdp + 6));
-			rte_compiler_barrier();
-			const __m128i raw_desc5 =
-				_mm_load_si128((void *)(rxdp + 5));
-			rte_compiler_barrier();
-			const __m128i raw_desc4 =
-				_mm_load_si128((void *)(rxdp + 4));
-			rte_compiler_barrier();
-			const __m128i raw_desc3 =
-				_mm_load_si128((void *)(rxdp + 3));
-			rte_compiler_barrier();
-			const __m128i raw_desc2 =
-				_mm_load_si128((void *)(rxdp + 2));
-			rte_compiler_barrier();
-			const __m128i raw_desc1 =
-				_mm_load_si128((void *)(rxdp + 1));
-			rte_compiler_barrier();
-			const __m128i raw_desc0 =
-				_mm_load_si128((void *)(rxdp + 0));
-
-			raw_desc6_7 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc6),
-					 raw_desc7, 1);
-			raw_desc4_5 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc4),
-					 raw_desc5, 1);
-			raw_desc2_3 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc2),
-					 raw_desc3, 1);
-			raw_desc0_1 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc0),
-					 raw_desc1, 1);
-		}
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
 
 		if (split_packet) {
 			int j;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 02/12] net/iavf: return error if opcode is mismatched
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 03/12] net/iavf: support to query DDP package info Leyi Rong
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Adds error return when the opcode of read message is
mismatched which is received from adminQ.

Fixes: 22b123a36d07 ("net/avf: initialize PMD")

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_vchnl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index fa4da3a6d..b7fb05d32 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len,
 	PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d",
 		    opcode, vf->cmd_retval);
 
-	if (opcode != vf->pend_cmd)
+	if (opcode != vf->pend_cmd) {
 		PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u",
 			    vf->pend_cmd, opcode);
+		return IAVF_ERR_OPCODE_MISMATCH;
+	}
 
 	return IAVF_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 03/12] net/iavf: support to query DDP package info
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 02/12] net/iavf: return error if opcode is mismatched Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Advanced iAVF supports to query DDP package info, includes
package version, track id, package name, device serial number
and the list of protocols that the PF supports.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |  9 ++++++++
 drivers/net/iavf/iavf_ethdev.c |  7 ++++++
 drivers/net/iavf/iavf_vchnl.c  | 41 +++++++++++++++++++++++++++++++++-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 526040c6e..39c6eeec9 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -67,6 +67,13 @@
 #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define IAVF_48_BIT_MASK  RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t)
 
+/* VF supported comms protocols 64-bits bitmap */
+#define IAVF_COMMS_PROTO_GTP            0x0000000000000001
+#define IAVF_COMMS_PROTO_PPPOE          0x0000000000000002
+#define IAVF_COMMS_PROTO_PFCP           0x0000000000000004
+#define IAVF_COMMS_PROTO_L2TPV3         0x0000000000000008
+#define IAVF_COMMS_PROTO_ESP            0x0000000000000010
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -97,6 +104,7 @@ struct iavf_info {
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
+	struct virtchnl_pkg_info pkg_info; /* package info */
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -225,6 +233,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_query_package_info(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
 int iavf_dev_link_update(struct rte_eth_dev *dev,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index ee9f82249..9c8f789da 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1236,6 +1236,13 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QUERY_DDP) {
+		if (iavf_query_package_info(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "iavf_query_package_info failed");
+			goto err_rss;
+		}
+	}
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index b7fb05d32..288d34e8b 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_PACKAGE_INFO:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 * add advanced/optional offload capabilities
 	 */
 
-	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+		VIRTCHNL_VF_OFFLOAD_QUERY_DDP;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -586,6 +588,43 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 	return err;
 }
 
+int
+iavf_query_package_info(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_PACKAGE_INFO;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_PACKAGE_INFO");
+		return ret;
+	}
+
+	rte_memcpy(&vf->pkg_info, args.out_buffer,
+		   sizeof(struct virtchnl_pkg_info));
+	PMD_DRV_LOG(NOTICE, "pkg version is %d.%d.%d.%d, pkg name is %s,"
+		    " track id is %x, serial number is %02x%02x%02x%02x"
+		    "%02x%02x%02x%02x, proto_metadata is 0x%016lx\n",
+		    vf->pkg_info.p_ver.major, vf->pkg_info.p_ver.minor,
+		    vf->pkg_info.p_ver.update, vf->pkg_info.p_ver.draft,
+		    vf->pkg_info.pkg_name, vf->pkg_info.track_id,
+		    vf->pkg_info.dsn[7], vf->pkg_info.dsn[6],
+		    vf->pkg_info.dsn[5], vf->pkg_info.dsn[4],
+		    vf->pkg_info.dsn[3], vf->pkg_info.dsn[2],
+		    vf->pkg_info.dsn[1], vf->pkg_info.dsn[0],
+		    vf->pkg_info.proto_metadata);
+
+	return 0;
+}
+
 int
 iavf_config_irq_map(struct iavf_adapter *adapter)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (2 preceding siblings ...)
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 03/12] net/iavf: support to query DDP package info Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-26  6:50   ` Wu, Jingjing
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 05/12] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in normal
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |   2 +
 drivers/net/iavf/iavf_ethdev.c |   8 +
 drivers/net/iavf/iavf_rxtx.c   | 507 ++++++++++++++++++++++++++++++++-
 drivers/net/iavf/iavf_rxtx.h   |  11 +
 drivers/net/iavf/iavf_vchnl.c  |  43 ++-
 5 files changed, 567 insertions(+), 4 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 39c6eeec9..4fe15237a 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -105,6 +105,7 @@ struct iavf_info {
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
 	struct virtchnl_pkg_info pkg_info; /* package info */
+	uint64_t supported_rxdid;
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -233,6 +234,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_get_supported_rxdid(struct iavf_adapter *adapter);
 int iavf_query_package_info(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 9c8f789da..f7b2562c1 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1243,6 +1243,14 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+		if (iavf_get_supported_rxdid(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+			goto err_rss;
+		}
+	}
+
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index fbb18a713..b9b35bdbb 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -718,6 +718,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 	}
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+			  volatile union iavf_rx_flex_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -752,6 +766,63 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+/* Translate the rx flex descriptor status to pkt flags */
+static inline void
+iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
+		       volatile union iavf_rx_flex_desc *rxdp)
+{
+	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+	uint16_t stat_err;
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	stat_err = rte_le_to_cpu_16(desc->status_error0);
+	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+		mb->ol_flags |= PKT_RX_RSS_HASH;
+		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+	}
+#endif
+}
+
+#define IAVF_RX_FLEX_ERR0_BITS	\
+	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+	uint64_t flags = 0;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+		return 0;
+
+	if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -873,6 +944,289 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	return nb_rx;
 }
 
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	volatile union iavf_rx_desc *rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint16_t rx_stat_err0;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit first */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
+		iavf_rxd_to_pkt_fields(rxm, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
+	}
+	rxq->rx_tail = rx_id;
+
+	/* If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the receive tail register of queue.
+	 * Update that register with the value of the last processed RX
+	 * descriptor minus 1.
+	 */
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
+			   "nb_hold=%u nb_rx=%u",
+			   rxq->port_id, rxq->queue_id,
+			   rx_id, nb_hold, nb_rx);
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint16_t rx_stat_err0;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+			if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(RTE_ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else {
+				rxm->data_len = (uint16_t)(rx_packet_len -
+							RTE_ETHER_CRC_LEN);
+			}
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
+		iavf_rxd_to_pkt_fields(first_seg, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	/* If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+	 * register. Update the RDT with the value of the last processed RX
+	 * descriptor minus 1, to guarantee that the RDT register is never
+	 * equal to the RDH register, which creates a "full" ring situtation
+	 * from the hardware point of view.
+	 */
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
+			   "nb_hold=%u nb_rx=%u",
+			   rxq->port_id, rxq->queue_id,
+			   rx_id, nb_hold, nb_rx);
+		rx_id = (uint16_t)(rx_id == 0 ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+
+	return nb_rx;
+}
+
 /* implement recv_scattered_pkts  */
 uint16_t
 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -1049,6 +1403,82 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 }
 
 #define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *mb;
+	uint16_t stat_err0;
+	uint16_t pkt_len;
+	int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+	int32_t i, j, nb_rx = 0;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+	/* Make sure there is at least 1 packet to receive */
+	if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* Scan LOOK_AHEAD descriptors at a time to determine which
+	 * descriptors reference packets that are ready to be received.
+	 */
+	for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+	     rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+			s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+			nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf parameters */
+		for (j = 0; j < nb_dd; j++) {
+			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+					  rxq->rx_tail +
+					  i * IAVF_LOOK_AHEAD + j);
+
+			mb = rxep[j];
+			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->ol_flags = 0;
+
+			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
+			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
+			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+			mb->ol_flags |= pkt_flags;
+		}
+
+		for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+			rxq->rx_stage[i + j] = rxep[j];
+
+		if (nb_dd != IAVF_LOOK_AHEAD)
+			break;
+	}
+
+	/* Clear software ring entries */
+	for (i = 0; i < nb_rx; i++)
+		rxq->sw_ring[rxq->rx_tail + i] = NULL;
+
+	return nb_rx;
+}
+
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1217,7 +1647,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (rxq->rx_nb_avail)
 		return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-	nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+	else
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
 	rxq->rx_next_avail = 0;
 	rxq->rx_nb_avail = nb_rx;
 	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1661,6 +2094,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
 	struct iavf_rx_queue *rxq;
 	int i;
@@ -1700,7 +2134,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (dev->data->scattered_rx) {
 		PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
 			    dev->data->port_id);
@@ -1708,7 +2145,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	} else {
 		PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_pkts;
 	}
 }
 
@@ -1784,6 +2224,35 @@ iavf_dev_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
 }
 
+/* Get the number of used descriptors of a rx queue for flexible RXD */
+uint32_t
+iavf_dev_rxq_count_flex_rxd(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+#define IAVF_RXQ_SCAN_INTERVAL 4
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	uint16_t desc = 0;
+
+	rxq = dev->data->rx_queues[queue_id];
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	while ((desc < rxq->nb_rx_desc) &&
+	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
+	       (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)) {
+		/* Check the DD bit of a rx descriptor of each 4 in a group,
+		 * to avoid checking too frequently and downgrading performance
+		 * too much.
+		 */
+		desc += IAVF_RXQ_SCAN_INTERVAL;
+		rxdp += IAVF_RXQ_SCAN_INTERVAL;
+		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
+			rxdp = (volatile union iavf_rx_flex_desc *)
+				&(rxq->rx_ring[rxq->rx_tail +
+					desc - rxq->nb_rx_desc]);
+	}
+
+	return desc;
+}
+
 /* Get the number of used descriptors of a rx queue */
 uint32_t
 iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
@@ -1795,6 +2264,10 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		return iavf_dev_rxq_count_flex_rxd(dev, queue_id);
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
@@ -1813,6 +2286,31 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 	return desc;
 }
 
+int
+iavf_dev_rx_desc_status_flex_rxd(void *rx_queue, uint16_t offset)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint32_t desc;
+
+	if (unlikely(offset >= rxq->nb_rx_desc))
+		return -EINVAL;
+
+	if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
+		return RTE_ETH_RX_DESC_UNAVAIL;
+
+	desc = rxq->rx_tail + offset;
+	if (desc >= rxq->nb_rx_desc)
+		desc -= rxq->nb_rx_desc;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[desc];
+	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))
+		return RTE_ETH_RX_DESC_DONE;
+
+	return RTE_ETH_RX_DESC_AVAIL;
+}
+
 int
 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 {
@@ -1821,6 +2319,9 @@ iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
 	uint64_t mask;
 	uint32_t desc;
 
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		return iavf_dev_rx_desc_status_flex_rxd(rx_queue, offset);
+
 	if (unlikely(offset >= rxq->nb_rx_desc))
 		return -EINVAL;
 
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 416433504..ee306d400 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -59,6 +59,7 @@
 
 /* HW desc structure, only 32-byte type is supported */
 #define iavf_rx_desc iavf_32byte_rx_desc
+#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
 
 struct iavf_rxq_ops {
 	void (*release_mbufs)(struct iavf_rx_queue *rxq);
@@ -83,6 +84,7 @@ struct iavf_rx_queue {
 	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
 	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
 	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
+	uint8_t rxdid;
 
 	/* used for VPMD */
 	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
@@ -175,9 +177,15 @@ void iavf_dev_tx_queue_release(void *txq);
 void iavf_stop_queues(struct rte_eth_dev *dev);
 uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts(void *rx_queue,
 				 struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		       uint16_t nb_pkts);
 uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -189,7 +197,10 @@ void iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 void iavf_dev_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 			  struct rte_eth_txq_info *qinfo);
 uint32_t iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id);
+uint32_t iavf_dev_rxq_count_flex_rxd(struct rte_eth_dev *dev,
+				     uint16_t queue_id);
 int iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset);
+int iavf_dev_rx_desc_status_flex_rxd(void *rx_queue, uint16_t offset);
 int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 288d34e8b..f0c283472 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -89,6 +89,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
 	case VIRTCHNL_OP_PACKAGE_INFO:
+	case VIRTCHNL_OP_RXDID:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -340,7 +341,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 */
 
 	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
-		VIRTCHNL_VF_OFFLOAD_QUERY_DDP;
+		VIRTCHNL_VF_OFFLOAD_QUERY_DDP |
+		VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -377,6 +379,31 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	return 0;
 }
 
+int
+iavf_get_supported_rxdid(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_RXDID;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_RXDID");
+		return ret;
+	}
+
+	vf->supported_rxdid = *(uint64_t *)args.out_buffer;
+
+	return 0;
+}
+
 int
 iavf_enable_queues(struct iavf_adapter *adapter)
 {
@@ -569,6 +596,20 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				rxq[i]->rxdid = IAVF_RXDID_COMMS_OVS_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				rxq[i]->rxdid = IAVF_RXDID_LEGACY_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 05/12] net/iavf: flexible Rx descriptor support in AVX path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (3 preceding siblings ...)
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 06/12] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in AVX
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c          |  24 +-
 drivers/net/iavf/iavf_rxtx.h          |   6 +
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 550 +++++++++++++++++++++++++-
 3 files changed, 570 insertions(+), 10 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index b9b35bdbb..57fffece9 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2115,16 +2115,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				    "Using %sVector Scattered Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_scattered_pkts_vec_avx2 :
-					    iavf_recv_scattered_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+					iavf_recv_scattered_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2 :
+					iavf_recv_scattered_pkts_vec;
 		} else {
 			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_pkts_vec_avx2 :
-					    iavf_recv_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2_flex_rxd :
+					iavf_recv_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2 :
+					iavf_recv_pkts_vec;
 		}
 
 		return;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index ee306d400..de13fd516 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -212,9 +212,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					  struct rte_mbuf **rx_pkts,
+					  uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue,
 					   struct rte_mbuf **rx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+						    struct rte_mbuf **rx_pkts,
+						    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 4e1231162..74f672c7e 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -11,14 +11,16 @@
 #endif
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
 	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) {
+		volatile union iavf_rx_flex_desc *rxdp =
+			(union iavf_rx_flex_desc *)rxdp;
+	}
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -110,7 +112,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	 * of time to act
 	 */
 	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
-		iavf_rxq_rearm(rxq);
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
@@ -548,6 +550,465 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline uint16_t
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts, uint8_t *split_packet)
+{
+#define IAVF_DESCS_PER_LOOP_AVX 8
+
+	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
+			0, rxq->mbuf_initializer);
+	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_flex_desc *rxdp =
+		(union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+		/* iavf_rxq_rearm(rxq); */
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* constants used in processing loop */
+	const __m256i crc_adjust =
+		_mm256_set_epi16
+			(/* first descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0,          /* ignore pkt_type field */
+			 /* second descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0           /* ignore pkt_type field */
+			);
+
+	/* 8 packets DD mask, LSB in each 32-bit value */
+	const __m256i dd_check = _mm256_set1_epi32(1);
+
+	/* 8 packets EOP mask, second-LSB in each 32-bit value */
+	const __m256i eop_check = _mm256_slli_epi32(dd_check,
+			IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+
+	/* mask to shuffle from desc. to mbuf (2 descriptors)*/
+	const __m256i shuf_msk =
+		_mm256_set_epi8
+			(/* first descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF,	/*pkt_type set as unknown */
+			 /* second descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF	/*pkt_type set as unknown */
+			);
+	/**
+	 * compile-time check the above crc and shuffle layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi
+	 * calls above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Status/Error flag masks */
+	/**
+	 * mask everything except Checksum Reports, RSS indication
+	 * and VLAN indication.
+	 * bit6:4 for IP/L4 checksum errors.
+	 * bit12 is for RSS indication.
+	 * bit13 is for VLAN indication.
+	 */
+	const __m256i flags_mask =
+		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+	/**
+	 * data to be shuffled by the result of the flags mask shifted by 4
+	 * bits.  This gives use the l3_l4 flags.
+	 */
+	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			/* second 128-bits */
+			0, 0, 0, 0, 0, 0, 0, 0,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i cksum_mask =
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+				   PKT_RX_EIP_CKSUM_BAD);
+	/**
+	 * data to be shuffled by result of flag mask, shifted down 12.
+	 * If RSS(bit12)/VLAN(bit13) are set,
+	 * shuffle moves appropriate flags in place.
+	 */
+	const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0,
+			/* end up 128-bits */
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	uint16_t i, received;
+
+	for (i = 0, received = 0; i < nb_pkts;
+	     i += IAVF_DESCS_PER_LOOP_AVX,
+	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+				    _mm256_loadu_si256((void *)&sw_ring[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256
+			((void *)&rx_pkts[i + 4],
+			 _mm256_loadu_si256((void *)&sw_ring[i + 4]));
+#endif
+
+		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
+
+		if (split_packet) {
+			int j;
+
+			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		/**
+		 * convert descriptors 4-7 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk);
+		__m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk);
+
+		mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust);
+		mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptype_mask =
+			_mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M);
+		const __m256i ptypes6_7 =
+			_mm256_and_si256(raw_desc6_7, ptype_mask);
+		const __m256i ptypes4_5 =
+			_mm256_and_si256(raw_desc4_5, ptype_mask);
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1);
+
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4);
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0);
+		/* merge the status bits into one register */
+		const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7,
+				raw_desc4_5);
+
+		/**
+		 * convert descriptors 0-3 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk);
+		__m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk);
+
+		mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust);
+		mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptypes2_3 =
+			_mm256_and_si256(raw_desc2_3, ptype_mask);
+		const __m256i ptypes0_1 =
+			_mm256_and_si256(raw_desc0_1, ptype_mask);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1);
+
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4);
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0);
+		/* merge the status bits into one register */
+		const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3,
+								raw_desc0_1);
+
+		/**
+		 * take the two sets of status bits and merge to one
+		 * After merge, the packets status flags are in the
+		 * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]
+		 */
+		__m256i status0_7 = _mm256_unpacklo_epi64(status4_7,
+							  status0_3);
+
+		/* now do flag manipulation */
+
+		/* get only flag/error bits we want */
+		const __m256i flag_bits =
+			_mm256_and_si256(status0_7, flags_mask);
+		/**
+		 * l3_l4_error flags, shuffle, then shift to correct adjustment
+		 * of flags in flags_shuf, and finally mask out extra bits
+		 */
+		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
+				_mm256_srli_epi32(flag_bits, 4));
+		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
+		/* set rss and vlan flags */
+		const __m256i rss_vlan_flag_bits =
+			_mm256_srli_epi32(flag_bits, 12);
+		const __m256i rss_vlan_flags =
+			_mm256_shuffle_epi8(rss_vlan_flags_shuf,
+					    rss_vlan_flag_bits);
+
+		/* merge flags */
+		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+				rss_vlan_flags);
+		/**
+		 * At this point, we have the 8 sets of flags in the low 16-bits
+		 * of each 32-bit value in vlan0.
+		 * We want to extract these, and merge them with the mbuf init
+		 * data so we can do a single write to the mbuf to set the flags
+		 * and all the other initialization fields. Extracting the
+		 * appropriate flags means that we have to do a shift and blend
+		 * for each mbuf before we do the write. However, we can also
+		 * add in the previously computed rx_descriptor fields to
+		 * make a single 256-bit write per mbuf
+		 */
+		/* check the structure matches expectations */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				 offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				 RTE_ALIGN(offsetof(struct rte_mbuf,
+						    rearm_data),
+					   16));
+		/* build up data and do writes */
+		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
+			rearm6, rearm7;
+		rearm6 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 8),
+					    0x04);
+		rearm4 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 4),
+					    0x04);
+		rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm0 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(mbuf_flags, 4),
+					    0x04);
+		/* permute to add in the rx_descriptor e.g. rss fields */
+		rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20);
+		rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20);
+		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
+		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
+		/* write to mbuf */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,
+				    rearm6);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,
+				    rearm4);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,
+				    rearm2);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,
+				    rearm0);
+
+		/* repeat for the odd mbufs */
+		const __m256i odd_flags =
+			_mm256_castsi128_si256
+				(_mm256_extracti128_si256(mbuf_flags, 1));
+		rearm7 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 8),
+					    0x04);
+		rearm5 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 4),
+					    0x04);
+		rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04);
+		rearm1 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(odd_flags, 4),
+					    0x04);
+		/* since odd mbufs are already in hi 128-bits use blend */
+		rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0);
+		rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0);
+		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
+		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
+		/* again write to mbufs */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,
+				    rearm7);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,
+				    rearm5);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,
+				    rearm3);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,
+				    rearm1);
+
+		/* extract and record EOP bit */
+		if (split_packet) {
+			const __m128i eop_mask =
+				_mm_set1_epi16(1 <<
+					       IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+			const __m256i eop_bits256 = _mm256_and_si256(status0_7,
+								     eop_check);
+			/* pack status bits into a single 128-bit register */
+			const __m128i eop_bits =
+				_mm_packus_epi32
+					(_mm256_castsi256_si128(eop_bits256),
+					 _mm256_extractf128_si256(eop_bits256,
+								  1));
+			/**
+			 * flip bits, and mask out the EOP bit, which is now
+			 * a split-packet bit i.e. !EOP, rather than EOP one.
+			 */
+			__m128i split_bits = _mm_andnot_si128(eop_bits,
+					eop_mask);
+			/**
+			 * eop bits are out of order, so we need to shuffle them
+			 * back into order again. In doing so, only use low 8
+			 * bits, which acts like another pack instruction
+			 * The original order is (hi->lo): 1,3,5,7,0,2,4,6
+			 * [Since we use epi8, the 16-bit positions are
+			 * multiplied by 2 in the eop_shuffle value.]
+			 */
+			__m128i eop_shuffle =
+				_mm_set_epi8(/* zero hi 64b */
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     /* move values to lo 64b */
+					     8, 0, 10, 2,
+					     12, 4, 14, 6);
+			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
+			*(uint64_t *)split_packet =
+				_mm_cvtsi128_si64(split_bits);
+			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+		}
+
+		/* perform dd_check */
+		status0_7 = _mm256_and_si256(status0_7, dd_check);
+		status0_7 = _mm256_packs_epi32(status0_7,
+					       _mm256_setzero_si256());
+
+		uint64_t burst = __builtin_popcountll
+					(_mm_cvtsi128_si64
+						(_mm256_extracti128_si256
+							(status0_7, 1)));
+		burst += __builtin_popcountll
+				(_mm_cvtsi128_si64
+					(_mm256_castsi256_si128(status0_7)));
+		received += burst;
+		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+			break;
+	}
+
+	/* update tail pointers */
+	rxq->rx_tail += received;
+	rxq->rx_tail &= (rxq->nb_rx_desc - 1);
+	if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */
+		rxq->rx_tail--;
+		received--;
+	}
+	rxq->rxrearm_nb += received;
+	return received;
+}
+
 /**
  * Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
@@ -559,6 +1020,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/**
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts,
+						     nb_pkts, NULL);
+}
+
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
@@ -624,6 +1097,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_pkts + retval, nb_pkts);
 }
 
+/**
+ * vPMD receive routine that reassembles single burst of
+ * 32 scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+static uint16_t
+iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
+					    struct rte_mbuf **rx_pkts,
+					    uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
+					rx_pkts, nb_pkts, split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+					     &split_flags[i]);
+}
+
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD.
+ * Main receive routine that can handle arbitrary burst sizes
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst =
+			iavf_recv_scattered_burst_vec_avx2_flex_rxd
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
+				rx_pkts + retval, nb_pkts);
+}
+
 static inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 06/12] net/iavf: flexible Rx descriptor support in SSE path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (4 preceding siblings ...)
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 05/12] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value Leyi Rong
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in SSE
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c         |   4 +-
 drivers/net/iavf/iavf_rxtx.h         |   5 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 57fffece9..5d484d5e9 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2119,7 +2119,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
-					iavf_recv_scattered_pkts_vec;
+					iavf_recv_scattered_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2 :
@@ -2132,7 +2132,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2_flex_rxd :
-					iavf_recv_pkts_vec;
+					iavf_recv_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2 :
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index de13fd516..c85207dae 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -205,9 +205,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				     uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+					       struct rte_mbuf **rx_pkts,
+					       uint16_t nb_pkts);
 uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 0365c49e1..9c1f2a445 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline void
+flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+		       struct rte_mbuf **rx_pkts)
+{
+	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	__m128i rearm0, rearm1, rearm2, rearm3;
+
+	__m128i tmp_desc, flags, rss_vlan;
+
+	/* mask everything except checksum, RSS and VLAN flags.
+	 * bit6:4 for checksum.
+	 * bit12 for RSS indication.
+	 * bit13 for VLAN indication.
+	 */
+	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
+						0x3070, 0x3070);
+
+	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD);
+
+	/* map the checksum, rss and vlan fields to the checksum, rss
+	 * and vlan flag
+	 */
+	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+
+	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	/* merge 4 descriptors */
+	flags = _mm_unpackhi_epi32(descs[0], descs[1]);
+	tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]);
+	tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc);
+	tmp_desc = _mm_and_si128(flags, desc_mask);
+
+	/* checksum flags */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 4);
+	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
+	/* then we shift left 1 bit */
+	flags = _mm_slli_epi32(flags, 1);
+	/* we need to mask out the reduntant bits introduced by RSS or
+	 * VLAN fields.
+	 */
+	flags = _mm_and_si128(flags, cksum_mask);
+
+	/* RSS, VLAN flag */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 8);
+	rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc);
+
+	/* merge the flags */
+	flags = _mm_or_si128(flags, rss_vlan);
+
+	/**
+	 * At this point, we have the 4 sets of flags in the low 16-bits
+	 * of each 32-bit value in flags.
+	 * We want to extract these, and merge them with the mbuf init data
+	 * so we can do a single 16-byte write to the mbuf to set the flags
+	 * and all the other initialization fields. Extracting the
+	 * appropriate flags means that we have to do a shift and blend for
+	 * each mbuf before we do the write.
+	 */
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+
+	/* write the rearm data and the olflags in one write */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_mbuf, rearm_data) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
 #define PKTLEN_SHIFT     10
 
 static inline void
@@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)];
 }
 
+static inline void
+flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
+		     const uint32_t *type_table)
+{
+	const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M);
+	__m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
+	__m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
+	__m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
+
+	ptype_all = _mm_and_si128(ptype_all, ptype_mask);
+
+	rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)];
+	rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)];
+	rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)];
+	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
+}
+
 /* Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+			    struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	__m128i crc_adjust = _mm_set_epi16
+				(0, 0, 0,       /* ignore non-length fields */
+				 -rxq->crc_len, /* sub crc on data_len */
+				 0,          /* ignore high-16bits of pkt_len */
+				 -rxq->crc_len, /* sub crc on pkt_len */
+				 0, 0           /* ignore pkt_type field */
+				);
+	const __m128i zero = _mm_setzero_si128();
+	/* mask to shuffle from desc. to mbuf */
+	const __m128i shuf_msk = _mm_set_epi8
+			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
+			 5, 4,        /* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			 5, 4,        /* octet 4~5, low 16 bits pkt_len */
+			 0xFF, 0xFF,  /* pkt_type set as unknown */
+			 0xFF, 0xFF   /* pkt_type set as unknown */
+			);
+	const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0x04, 0x0C,
+						   0x00, 0x08);
+
+	/**
+	 * compile-time check the above crc_adjust layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi16
+	 * call above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
+	/* 4 packets DD mask */
+	const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL,
+						0x0000000100000001LL);
+	/* 4 packets EOP mask */
+	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
+						 0x0000000200000002LL);
+
+	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
+
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > rxq->rx_free_thresh)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/**
+	 * Compile-time verify the shuffle mask
+	 * NOTE: some field positions already verified above, but duplicated
+	 * here for completeness in case of future modifications.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+	     pos += IAVF_VPMD_DESCS_PER_LOOP,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i staterr, sterr_tmp1, sterr_tmp2;
+		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
+		__m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+		__m128i mbp2;
+#endif
+
+		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.1 load 2 64 bit mbuf points */
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+#endif
+
+		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+		rte_compiler_barrier();
+		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
+
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+			 pkt_mb4);
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+			 pkt_mb3);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			/* and with mask to extract bits, flipping 1-0 */
+			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = _mm_and_si128(staterr, dd_check);
+		staterr = _mm_packs_epi32(staterr, zero);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+			 pkt_mb2);
+		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+				 pkt_mb1);
+		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		nb_pkts_recd += var;
+		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
 /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
@@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
+/* vPMD receive routine that reassembles scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	unsigned int i = 0;
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
+					      split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (5 preceding siblings ...)
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 06/12] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-26  6:57   ` Wu, Jingjing
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 08/12] net/iavf: support flow mark in normal data path Leyi Rong
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

The commit adds fdir_enabled flag into iavf_adapter structure
to identify if fdir id is active. Rx data path can be benefit if
fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4fe15237a..1918a67f1 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -142,6 +142,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint8_t fdir_enabled;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index c85207dae..5548d1adb 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -281,6 +281,32 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+/* Enable/disable flow director Rx processing in data path. */
+static inline
+void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on)
+{
+	static uint32_t ref_cnt;
+
+	if (on) {
+		/* enable flow director processing */
+		if (ref_cnt++ == 0) {
+			ad->fdir_enabled = on;
+			PMD_DRV_LOG(DEBUG,
+				    "FDIR processing on RX set to %d", on);
+		}
+	} else {
+		if (ref_cnt >= 1) {
+			ref_cnt--;
+
+			if (ref_cnt == 0) {
+				ad->fdir_enabled = on;
+				PMD_DRV_LOG(DEBUG,
+					    "FDIR processing on RX set to %d", on);
+			}
+		}
+	}
+}
+
 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 08/12] net/iavf: support flow mark in normal data path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (6 preceding siblings ...)
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-03-16  7:45 ` Leyi Rong
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 09/12] net/iavf: support flow mark in AVX path Leyi Rong
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:45 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing in normal path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  3 +++
 drivers/net/iavf/iavf_rxtx.c | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 1918a67f1..533856f21 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -74,6 +74,9 @@
 #define IAVF_COMMS_PROTO_L2TPV3         0x0000000000000008
 #define IAVF_COMMS_PROTO_ESP            0x0000000000000010
 
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 5d484d5e9..d941249e3 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -746,6 +746,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 					IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
 			IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+	/* Check if FDIR Match */
+	flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+				PKT_RX_FDIR : 0);
+
 	if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
 		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 		return flags;
@@ -766,6 +770,25 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+	uint64_t flags = 0;
+	uint16_t flexbh;
+
+	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+	if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+		mb->hash.fdir.hi =
+			rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+		flags |= PKT_RX_FDIR_ID;
+	}
+
+	return flags;
+}
+
 /* Translate the rx flex descriptor status to pkt flags */
 static inline void
 iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
@@ -782,6 +805,11 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 #endif
+
+	if (desc->flow_id != 0xFFFFFFFF) {
+		mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+	}
 }
 
 #define IAVF_RX_FLEX_ERR0_BITS	\
@@ -917,6 +945,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1365,6 +1396,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			first_seg->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
 		first_seg->ol_flags |= pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1549,6 +1583,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 				mb->hash.rss = rte_le_to_cpu_32(
 					rxdp[j].wb.qword0.hi_dword.rss);
 
+			if (pkt_flags & PKT_RX_FDIR)
+				pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
 			mb->ol_flags |= pkt_flags;
 		}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 09/12] net/iavf: support flow mark in AVX path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (7 preceding siblings ...)
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 08/12] net/iavf: support flow mark in normal data path Leyi Rong
@ 2020-03-16  7:46 ` Leyi Rong
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 10/12] net/iavf: support flow mark in SSE path Leyi Rong
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:46 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in AVX path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 82 +++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 5 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 74f672c7e..9bbf75632 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -550,6 +550,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline __m256i
+flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC);
+	__m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask);
+	const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline uint16_t
 _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
@@ -612,8 +631,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m256i shuf_msk =
 		_mm256_set_epi8
 			(/* first descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -621,8 +640,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/* pkt_type set as unknown */
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -864,8 +883,61 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 					    rss_vlan_flag_bits);
 
 		/* merge flags */
-		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+		__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
 				rss_vlan_flags);
+
+		if (rxq->vsi->adapter->fdir_enabled) {
+			const __m256i fdir_id4_7 =
+				_mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5);
+
+			const __m256i fdir_id0_3 =
+				_mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1);
+
+			const __m256i fdir_id0_7 =
+				_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);
+
+			const __m256i fdir_flags =
+				flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7);
+
+			/* merge with fdir_flags */
+			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);
+
+			/* write to mbuf: have to use scalar store here */
+			uint32_t fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 3);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 0]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 7);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 1]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 2);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 2]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 6);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 3]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 1);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 4]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 5);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 5]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 0);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 6]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 4);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 7]->hash.fdir.hi = fdir_id_extr;
+		} /* if() on fdir_enabled */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 10/12] net/iavf: support flow mark in SSE path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (8 preceding siblings ...)
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 09/12] net/iavf: support flow mark in AVX path Leyi Rong
@ 2020-03-16  7:46 ` Leyi Rong
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 11/12] net/iavf: add RSS hash parsing in AVX path Leyi Rong
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:46 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in SSE path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 48 +++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 9c1f2a445..eb784938f 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline __m128i
+flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC);
+	__m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask);
+	const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
@@ -267,6 +286,32 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
+	if (rxq->vsi->adapter->fdir_enabled) {
+		const __m128i fdir_id0_1 =
+			_mm_unpackhi_epi32(descs[0], descs[1]);
+
+		const __m128i fdir_id2_3 =
+			_mm_unpackhi_epi32(descs[2], descs[3]);
+
+		const __m128i fdir_id0_3 =
+			_mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3);
+
+		const __m128i fdir_flags =
+			flex_rxd_to_fdir_flags_vec(fdir_id0_3);
+
+		/* merge with fdir_flags */
+		flags = _mm_or_si128(flags, fdir_flags);
+
+		/* write fdir_id to mbuf if FDIR match */
+		uint32_t fdir_id_extr, i;
+
+		for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
+			fdir_id_extr = _mm_extract_epi32(fdir_id0_3, i);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i]->hash.fdir.hi = fdir_id_extr;
+		}
+	} /* if() on fdir_enabled */
+
 	/**
 	 * At this point, we have the 4 sets of flags in the low 16-bits
 	 * of each 32-bit value in flags.
@@ -604,7 +649,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i zero = _mm_setzero_si128();
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
-			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			(0xFF, 0xFF,
+			 0xFF, 0xFF,  /* rss not supported */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 11/12] net/iavf: add RSS hash parsing in AVX path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (9 preceding siblings ...)
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 10/12] net/iavf: support flow mark in SSE path Leyi Rong
@ 2020-03-16  7:46 ` Leyi Rong
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 12/12] net/iavf: add RSS hash parsing in SSE path Leyi Rong
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:46 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in AVX data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 9bbf75632..efecdd714 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -632,7 +632,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		_mm256_set_epi8
 			(/* first descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -641,7 +641,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -938,6 +938,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				rx_pkts[i + 7]->hash.fdir.hi = fdir_id_extr;
 		} /* if() on fdir_enabled */
 
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh7 =
+				_mm_load_si128
+					((void *)(&rxdp[7].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh6 =
+				_mm_load_si128
+					((void *)(&rxdp[6].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh5 =
+				_mm_load_si128
+					((void *)(&rxdp[5].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh4 =
+				_mm_load_si128
+					((void *)(&rxdp[4].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			__m256i raw_desc_bh6_7 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh6),
+					raw_desc_bh7, 1);
+			__m256i raw_desc_bh4_5 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh4),
+					raw_desc_bh5, 1);
+			__m256i raw_desc_bh2_3 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh2),
+					raw_desc_bh3, 1);
+			__m256i raw_desc_bh0_1 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh0),
+					raw_desc_bh1, 1);
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m256i rss_hash6_7 =
+				_mm256_slli_epi64(raw_desc_bh6_7, 32);
+			__m256i rss_hash4_5 =
+				_mm256_slli_epi64(raw_desc_bh4_5, 32);
+			__m256i rss_hash2_3 =
+				_mm256_slli_epi64(raw_desc_bh2_3, 32);
+			__m256i rss_hash0_1 =
+				_mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+			__m256i rss_hash_msk =
+				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+						 0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash6_7 = _mm256_and_si256
+					(rss_hash6_7, rss_hash_msk);
+			rss_hash4_5 = _mm256_and_si256
+					(rss_hash4_5, rss_hash_msk);
+			rss_hash2_3 = _mm256_and_si256
+					(rss_hash2_3, rss_hash_msk);
+			rss_hash0_1 = _mm256_and_si256
+					(rss_hash0_1, rss_hash_msk);
+
+			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+		} /* if() on RSS hash parsing */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH 12/12] net/iavf: add RSS hash parsing in SSE path
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (10 preceding siblings ...)
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 11/12] net/iavf: add RSS hash parsing in AVX path Leyi Rong
@ 2020-03-16  7:46 ` Leyi Rong
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-16  7:46 UTC (permalink / raw)
  To: qi.z.zhang, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in SSE data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 86 ++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index eb784938f..a615f22e2 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -650,7 +650,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
 			(0xFF, 0xFF,
-			 0xFF, 0xFF,  /* rss not supported */
+			 0xFF, 0xFF,  /* rss hash parsed separately */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -741,7 +741,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
 		__m128i mbp1;
@@ -787,8 +787,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		rte_compiler_barrier();
 
 		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -798,12 +802,66 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
-		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m128i rss_hash3 =
+				_mm_slli_epi64(raw_desc_bh3, 32);
+			__m128i rss_hash2 =
+				_mm_slli_epi64(raw_desc_bh2, 32);
+			__m128i rss_hash1 =
+				_mm_slli_epi64(raw_desc_bh1, 32);
+			__m128i rss_hash0 =
+				_mm_slli_epi64(raw_desc_bh0, 32);
+
+			__m128i rss_hash_msk =
+				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash3 = _mm_and_si128
+					(rss_hash3, rss_hash_msk);
+			rss_hash2 = _mm_and_si128
+					(rss_hash2, rss_hash_msk);
+			rss_hash1 = _mm_and_si128
+					(rss_hash1, rss_hash_msk);
+			rss_hash0 = _mm_and_si128
+					(rss_hash0, rss_hash_msk);
+
+			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+		} /* if() on RSS hash parsing */
 
 		/* C.2 get 4 pkts staterr value  */
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -811,14 +869,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-			 pkt_mb4);
+			 pkt_mb3);
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-			 pkt_mb3);
-
-		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+			 pkt_mb2);
 
 		/* C* extract and record EOP bit */
 		if (split_packet) {
@@ -842,9 +896,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-			 pkt_mb2);
+			 pkt_mb1);
 		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-				 pkt_mb1);
+				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-03-26  6:50   ` Wu, Jingjing
  0 siblings, 0 replies; 80+ messages in thread
From: Wu, Jingjing @ 2020-03-26  6:50 UTC (permalink / raw)
  To: Rong, Leyi, Zhang, Qi Z, Ye, Xiaolong; +Cc: dev, Rong, Leyi

One general comment:
Looks like there are exact same code comparing with legacy rx, such as logic to update tail, multi-segments loop.
It will be good if all the common code can be wrapped and use for multi recv functions

[...]

+/* Get the number of used descriptors of a rx queue for flexible RXD */ 
+uint32_t iavf_dev_rxq_count_flex_rxd(struct rte_eth_dev *dev, uint16_t 
+queue_id) { #define IAVF_RXQ_SCAN_INTERVAL 4
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	uint16_t desc = 0;
+
+	rxq = dev->data->rx_queues[queue_id];
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	while ((desc < rxq->nb_rx_desc) &&
+	       rte_le_to_cpu_16(rxdp->wb.status_error0) &
+	       (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)) {
+		/* Check the DD bit of a rx descriptor of each 4 in a group,
+		 * to avoid checking too frequently and downgrading performance
+		 * too much.
+		 */
+		desc += IAVF_RXQ_SCAN_INTERVAL;
+		rxdp += IAVF_RXQ_SCAN_INTERVAL;
+		if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
+			rxdp = (volatile union iavf_rx_flex_desc *)
+				&(rxq->rx_ring[rxq->rx_tail +
+					desc - rxq->nb_rx_desc]);
+	}
+
+	return desc;
+}

No much difference between iavf_dev_rxq_count. Why do we need a new one? DD bit is located in the same place, right?
Can we merge those two functions to one?  
+
 /* Get the number of used descriptors of a rx queue */  uint32_t  iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id) @@ -1795,6 +2264,10 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		return iavf_dev_rxq_count_flex_rxd(dev, queue_id);
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) & @@ -1813,6 +2286,31 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 	return desc;
 }
 
+int
+iavf_dev_rx_desc_status_flex_rxd(void *rx_queue, uint16_t offset) {
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint32_t desc;
+
+	if (unlikely(offset >= rxq->nb_rx_desc))
+		return -EINVAL;
+
+	if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
+		return RTE_ETH_RX_DESC_UNAVAIL;
+
+	desc = rxq->rx_tail + offset;
+	if (desc >= rxq->nb_rx_desc)
+		desc -= rxq->nb_rx_desc;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[desc];
+	if (rte_le_to_cpu_16(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))
+		return RTE_ETH_RX_DESC_DONE;
+
+	return RTE_ETH_RX_DESC_AVAIL;
+}
+

Similar comments as above.

[......] 

  { @@ -569,6 +596,20 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				rxq[i]->rxdid = IAVF_RXDID_COMMS_OVS_1;
Because this function is used to construct virtchnl message for configure queues.
The rxdid in rxq[i] should be set before this function is called. How about to move this line assignment to iavf_dev_rx_queue_setup?
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				rxq[i]->rxdid = IAVF_RXDID_LEGACY_1;
Same as above.

+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
 		}
 	}
 
--
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value
  2020-03-16  7:45 ` [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-03-26  6:57   ` Wu, Jingjing
  0 siblings, 0 replies; 80+ messages in thread
From: Wu, Jingjing @ 2020-03-26  6:57 UTC (permalink / raw)
  To: Rong, Leyi, Zhang, Qi Z, Ye, Xiaolong; +Cc: dev, Rong, Leyi



-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Leyi Rong
Sent: Monday, March 16, 2020 3:46 PM
To: Zhang, Qi Z <qi.z.zhang@intel.com>; Ye, Xiaolong <xiaolong.ye@intel.com>
Cc: dev@dpdk.org; Rong, Leyi <leyi.rong@intel.com>
Subject: [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value

The commit adds fdir_enabled flag into iavf_adapter structure to identify if fdir id is active. Rx data path can be benefit if fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 4fe15237a..1918a67f1 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -142,6 +142,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint8_t fdir_enabled;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index c85207dae..5548d1adb 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -281,6 +281,32 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+/* Enable/disable flow director Rx processing in data path. */ static 
+inline void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on) 
+{
+	static uint32_t ref_cnt;
+
+	if (on) {
+		/* enable flow director processing */
+		if (ref_cnt++ == 0) {
+			ad->fdir_enabled = on;
+			PMD_DRV_LOG(DEBUG,
+				    "FDIR processing on RX set to %d", on);
+		}
+	} else {
+		if (ref_cnt >= 1) {
+			ref_cnt--;
+
+			if (ref_cnt == 0) {
+				ad->fdir_enabled = on;
+				PMD_DRV_LOG(DEBUG,
+					    "FDIR processing on RX set to %d", on);
+			}
+		}
+	}
+}
+

fdir_enabled is used for fast path. To avoid competition, how about to make it in queue granularity?


 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
--
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (11 preceding siblings ...)
  2020-03-16  7:46 ` [dpdk-dev] [PATCH 12/12] net/iavf: add RSS hash parsing in SSE path Leyi Rong
@ 2020-03-31  8:02 ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 01/12] net/iavf: flexible Rx descriptor definitions Leyi Rong
                     ` (11 more replies)
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                   ` (3 subsequent siblings)
  16 siblings, 12 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

This patchset enable framework for advanced iAVF, includes
query DDP pkg info, flexible descriptor support, FDIR mark
id and RSS hash support.

---
v2:
- Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
  it's defined in AVF spec.
- Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
- Move flex desc definitions into iavf_rxtx.h.
- Up to date to match with the latest version of virtchnl.h.
- Extract a new internal func iavf_update_rx_tail to call.
- Remove
  iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
  as the accompanying legacy ones can deal with the flex desc cases.
- Move rxq->rxdid assignment from iavf_configure_queues()
  to iavf_dev_rx_queue_setup().
- Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
  when using GCC compile option -O0.

Leyi Rong (12):
  net/iavf: flexible Rx descriptor definitions
  net/iavf: return error if opcode is mismatched
  net/iavf: support to query DDP package info
  net/iavf: flexible Rx descriptor support in normal path
  net/iavf: flexible Rx descriptor support in AVX path
  net/iavf: flexible Rx descriptor support in SSE path
  net/iavf: add flow director enabled switch value
  net/iavf: support flow mark in normal data path
  net/iavf: support flow mark in AVX path
  net/iavf: support flow mark in SSE path
  net/iavf: add RSS hash parsing in AVX path
  net/iavf: add RSS hash parsing in SSE path

 drivers/net/iavf/iavf.h               |  15 +
 drivers/net/iavf/iavf_ethdev.c        |  15 +
 drivers/net/iavf/iavf_rxtx.c          | 542 ++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx.h          | 245 +++++++++
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 710 +++++++++++++++++++++++++-
 drivers/net/iavf/iavf_rxtx_vec_sse.c  | 524 +++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c         |  85 ++-
 7 files changed, 2087 insertions(+), 49 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 01/12] net/iavf: flexible Rx descriptor definitions
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 02/12] net/iavf: return error if opcode is mismatched Leyi Rong
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Add definitions for flexible Rx descriptor structures and macros.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.h | 200 +++++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 09b5bd99e..5e309631e 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -157,6 +157,206 @@ union iavf_tx_offload {
 	};
 };
 
+/* Rx Flex Descriptors
+ * These descriptors are used instead of the legacy version descriptors
+ */
+union iavf_16b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+	} wb; /* writeback */
+};
+
+union iavf_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 16-21
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Flow ID upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 22-23 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms_ovs {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 flow_id;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 rss_hash;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum iavf_rxdid {
+	IAVF_RXDID_LEGACY_0		= 0,
+	IAVF_RXDID_LEGACY_1		= 1,
+	IAVF_RXDID_FLEX_NIC		= 2,
+	IAVF_RXDID_FLEX_NIC_2		= 6,
+	IAVF_RXDID_HW			= 7,
+	IAVF_RXDID_COMMS_GENERIC	= 16,
+	IAVF_RXDID_COMMS_AUX_VLAN	= 17,
+	IAVF_RXDID_COMMS_AUX_IPV4	= 18,
+	IAVF_RXDID_COMMS_AUX_IPV6	= 19,
+	IAVF_RXDID_COMMS_AUX_IPV6_FLOW	= 20,
+	IAVF_RXDID_COMMS_AUX_TCP	= 21,
+	IAVF_RXDID_COMMS_OVS_1		= 22,
+	IAVF_RXDID_COMMS_OVS_2		= 23,
+	IAVF_RXDID_LAST			= 63,
+};
+
+enum iavf_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	IAVF_RX_FLEX_DESC_STATUS0_EOF_S,
+	IAVF_RX_FLEX_DESC_STATUS0_HBO_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LPBK_S,
+	IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RXE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_CRCP_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
+#define IAVF_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for iavf_32b_rx_flex_desc.pkt_len member */
+#define IAVF_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
 int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			   uint16_t queue_idx,
 			   uint16_t nb_desc,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 02/12] net/iavf: return error if opcode is mismatched
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 01/12] net/iavf: flexible Rx descriptor definitions Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 03/12] net/iavf: support to query DDP package info Leyi Rong
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Adds error return when the opcode of read message is
mismatched which is received from adminQ.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_vchnl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index fa4da3a6d..b7fb05d32 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len,
 	PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d",
 		    opcode, vf->cmd_retval);
 
-	if (opcode != vf->pend_cmd)
+	if (opcode != vf->pend_cmd) {
 		PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u",
 			    vf->pend_cmd, opcode);
+		return IAVF_ERR_OPCODE_MISMATCH;
+	}
 
 	return IAVF_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 03/12] net/iavf: support to query DDP package info
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 01/12] net/iavf: flexible Rx descriptor definitions Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 02/12] net/iavf: return error if opcode is mismatched Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Advanced iAVF supports to query DDP package info, includes
package version, track id, package name, device serial number
and the list of protocols that the PF supports.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |  9 ++++++++
 drivers/net/iavf/iavf_ethdev.c |  7 ++++++
 drivers/net/iavf/iavf_vchnl.c  | 41 +++++++++++++++++++++++++++++++++-
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 526040c6e..39c6eeec9 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -67,6 +67,13 @@
 #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define IAVF_48_BIT_MASK  RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t)
 
+/* VF supported comms protocols 64-bits bitmap */
+#define IAVF_COMMS_PROTO_GTP            0x0000000000000001
+#define IAVF_COMMS_PROTO_PPPOE          0x0000000000000002
+#define IAVF_COMMS_PROTO_PFCP           0x0000000000000004
+#define IAVF_COMMS_PROTO_L2TPV3         0x0000000000000008
+#define IAVF_COMMS_PROTO_ESP            0x0000000000000010
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -97,6 +104,7 @@ struct iavf_info {
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
+	struct virtchnl_pkg_info pkg_info; /* package info */
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -225,6 +233,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_query_package_info(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
 int iavf_dev_link_update(struct rte_eth_dev *dev,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 7a8bec9c9..f90e96fd3 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1236,6 +1236,13 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QUERY_DDP) {
+		if (iavf_query_package_info(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "iavf_query_package_info failed");
+			goto err_rss;
+		}
+	}
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index b7fb05d32..cadf658a6 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_PACKAGE_INFO:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 * add advanced/optional offload capabilities
 	 */
 
-	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+		VIRTCHNL_VF_OFFLOAD_QUERY_DDP;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -586,6 +588,43 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 	return err;
 }
 
+int
+iavf_query_package_info(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_GET_PACKAGE_INFO;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_PACKAGE_INFO");
+		return ret;
+	}
+
+	rte_memcpy(&vf->pkg_info, args.out_buffer,
+		   sizeof(struct virtchnl_pkg_info));
+	PMD_DRV_LOG(NOTICE, "pkg version is %d.%d.%d.%d, pkg name is %s,"
+		    " track id is %x, serial number is %02x%02x%02x%02x"
+		    "%02x%02x%02x%02x, proto_metadata is 0x%016lx\n",
+		    vf->pkg_info.pkg_ver.major, vf->pkg_info.pkg_ver.minor,
+		    vf->pkg_info.pkg_ver.update, vf->pkg_info.pkg_ver.draft,
+		    vf->pkg_info.pkg_name, vf->pkg_info.track_id,
+		    vf->pkg_info.dsn[7], vf->pkg_info.dsn[6],
+		    vf->pkg_info.dsn[5], vf->pkg_info.dsn[4],
+		    vf->pkg_info.dsn[3], vf->pkg_info.dsn[2],
+		    vf->pkg_info.dsn[1], vf->pkg_info.dsn[0],
+		    vf->pkg_info.proto_metadata);
+
+	return 0;
+}
+
 int
 iavf_config_irq_map(struct iavf_adapter *adapter)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 04/12] net/iavf: flexible Rx descriptor support in normal path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (2 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 03/12] net/iavf: support to query DDP package info Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 05/12] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
                     ` (7 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in normal
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |   2 +
 drivers/net/iavf/iavf_ethdev.c |   8 +
 drivers/net/iavf/iavf_rxtx.c   | 481 ++++++++++++++++++++++++++++++---
 drivers/net/iavf/iavf_rxtx.h   |   8 +
 drivers/net/iavf/iavf_vchnl.c  |  42 ++-
 5 files changed, 503 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 39c6eeec9..4fe15237a 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -105,6 +105,7 @@ struct iavf_info {
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
 	struct virtchnl_pkg_info pkg_info; /* package info */
+	uint64_t supported_rxdid;
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -233,6 +234,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_get_supported_rxdid(struct iavf_adapter *adapter);
 int iavf_query_package_info(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index f90e96fd3..4f916b413 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1243,6 +1243,14 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+		if (iavf_get_supported_rxdid(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+			goto err_rss;
+		}
+	}
+
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 9eccb7c41..a66002a25 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -346,6 +346,14 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	if (vf->vf_res->vf_cap_flags &
+	    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+	    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+		rxq->rxdid = IAVF_RXDID_COMMS_OVS_1;
+	} else {
+		rxq->rxdid = IAVF_RXDID_LEGACY_1;
+	}
+
 	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_free_thresh;
@@ -720,6 +728,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 	}
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+			  volatile union iavf_rx_flex_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -754,6 +776,89 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+/* Translate the rx flex descriptor status to pkt flags */
+static inline void
+iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
+		       volatile union iavf_rx_flex_desc *rxdp)
+{
+	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+	uint16_t stat_err;
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	stat_err = rte_le_to_cpu_16(desc->status_error0);
+	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+		mb->ol_flags |= PKT_RX_RSS_HASH;
+		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+	}
+#endif
+}
+
+#define IAVF_RX_FLEX_ERR0_BITS	\
+	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+	uint64_t flags = 0;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+		return 0;
+
+	if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
+/* If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register. Update the RDT with the value of the last processed RX
+ * descriptor minus 1, to guarantee that the RDT register is never
+ * equal to the RDH register, which creates a "full" ring situtation
+ * from the hardware point of view.
+ */
+static inline void
+iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+{
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG,
+			   "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+
+	return;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -854,23 +959,256 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	}
 	rxq->rx_tail = rx_id;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the receive tail register of queue.
-	 * Update that register with the value of the last processed RX
-	 * descriptor minus 1.
-	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)((rx_id == 0) ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	volatile union iavf_rx_desc *rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint16_t rx_stat_err0;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit first */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
+		iavf_rxd_to_pkt_fields(rxm, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
 	}
-	rxq->nb_rx_hold = nb_hold;
+	rxq->rx_tail = rx_id;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint16_t rx_stat_err0;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+			if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(RTE_ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else {
+				rxm->data_len = (uint16_t)(rx_packet_len -
+							RTE_ETHER_CRC_LEN);
+			}
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
+		iavf_rxd_to_pkt_fields(first_seg, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
 
 	return nb_rx;
 }
@@ -1027,30 +1365,88 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->pkt_first_seg = first_seg;
 	rxq->pkt_last_seg = last_seg;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
-	 * register. Update the RDT with the value of the last processed RX
-	 * descriptor minus 1, to guarantee that the RDT register is never
-	 * equal to the RDH register, which creates a "full" ring situtation
-	 * from the hardware point of view.
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+#define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *mb;
+	uint16_t stat_err0;
+	uint16_t pkt_len;
+	int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+	int32_t i, j, nb_rx = 0;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+	/* Make sure there is at least 1 packet to receive */
+	if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* Scan LOOK_AHEAD descriptors at a time to determine which
+	 * descriptors reference packets that are ready to be received.
 	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)(rx_id == 0 ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+	     rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+			s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+			nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf parameters */
+		for (j = 0; j < nb_dd; j++) {
+			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+					  rxq->rx_tail +
+					  i * IAVF_LOOK_AHEAD + j);
+
+			mb = rxep[j];
+			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->ol_flags = 0;
+
+			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
+			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
+			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+			mb->ol_flags |= pkt_flags;
+		}
+
+		for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+			rxq->rx_stage[i + j] = rxep[j];
+
+		if (nb_dd != IAVF_LOOK_AHEAD)
+			break;
 	}
-	rxq->nb_rx_hold = nb_hold;
+
+	/* Clear software ring entries */
+	for (i = 0; i < nb_rx; i++)
+		rxq->sw_ring[rxq->rx_tail + i] = NULL;
 
 	return nb_rx;
 }
 
-#define IAVF_LOOK_AHEAD 8
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1219,7 +1615,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (rxq->rx_nb_avail)
 		return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-	nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+	else
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
 	rxq->rx_next_avail = 0;
 	rxq->rx_nb_avail = nb_rx;
 	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1663,6 +2062,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
 	struct iavf_rx_queue *rxq;
 	int i;
@@ -1702,7 +2102,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (dev->data->scattered_rx) {
 		PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
 			    dev->data->port_id);
@@ -1710,7 +2113,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	} else {
 		PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_pkts;
 	}
 }
 
@@ -1797,6 +2203,7 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 5e309631e..f33d1df41 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -62,6 +62,7 @@
 #define iavf_rx_desc iavf_16byte_rx_desc
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
+#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
 #endif
 
 struct iavf_rxq_ops {
@@ -87,6 +88,7 @@ struct iavf_rx_queue {
 	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
 	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
 	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
+	uint8_t rxdid;
 
 	/* used for VPMD */
 	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
@@ -379,9 +381,15 @@ void iavf_dev_tx_queue_release(void *txq);
 void iavf_stop_queues(struct rte_eth_dev *dev);
 uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts(void *rx_queue,
 				 struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		       uint16_t nb_pkts);
 uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index cadf658a6..245a1fd49 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -89,6 +89,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
 	case VIRTCHNL_OP_GET_PACKAGE_INFO:
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -340,7 +341,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 */
 
 	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
-		VIRTCHNL_VF_OFFLOAD_QUERY_DDP;
+		VIRTCHNL_VF_OFFLOAD_QUERY_DDP |
+		VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -377,6 +379,32 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	return 0;
 }
 
+int
+iavf_get_supported_rxdid(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_SUPPORTED_RXDIDS");
+		return ret;
+	}
+
+	vf->supported_rxdid =
+		((struct virtchnl_supported_rxdids *)args.out_buffer)->supported_rxdids;
+
+	return 0;
+}
+
 int
 iavf_enable_queues(struct iavf_adapter *adapter)
 {
@@ -569,6 +597,18 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 05/12] net/iavf: flexible Rx descriptor support in AVX path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (3 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 06/12] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
                     ` (6 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in AVX
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c          |  24 +-
 drivers/net/iavf/iavf_rxtx.h          |   6 +
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 550 +++++++++++++++++++++++++-
 3 files changed, 570 insertions(+), 10 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index a66002a25..4fa8d2358 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2083,16 +2083,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				    "Using %sVector Scattered Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_scattered_pkts_vec_avx2 :
-					    iavf_recv_scattered_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+					iavf_recv_scattered_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2 :
+					iavf_recv_scattered_pkts_vec;
 		} else {
 			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_pkts_vec_avx2 :
-					    iavf_recv_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2_flex_rxd :
+					iavf_recv_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2 :
+					iavf_recv_pkts_vec;
 		}
 
 		return;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index f33d1df41..8e1db2588 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -413,9 +413,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					  struct rte_mbuf **rx_pkts,
+					  uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue,
 					   struct rte_mbuf **rx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+						    struct rte_mbuf **rx_pkts,
+						    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 2587083d8..b23188fd3 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -11,14 +11,16 @@
 #endif
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
 	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) {
+		volatile union iavf_rx_flex_desc *rxdp =
+			(union iavf_rx_flex_desc *)rxdp;
+	}
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -160,7 +162,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	 * of time to act
 	 */
 	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
-		iavf_rxq_rearm(rxq);
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
@@ -614,6 +616,465 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline uint16_t
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts, uint8_t *split_packet)
+{
+#define IAVF_DESCS_PER_LOOP_AVX 8
+
+	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
+			0, rxq->mbuf_initializer);
+	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_flex_desc *rxdp =
+		(union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+		/* iavf_rxq_rearm(rxq); */
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* constants used in processing loop */
+	const __m256i crc_adjust =
+		_mm256_set_epi16
+			(/* first descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0,          /* ignore pkt_type field */
+			 /* second descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0           /* ignore pkt_type field */
+			);
+
+	/* 8 packets DD mask, LSB in each 32-bit value */
+	const __m256i dd_check = _mm256_set1_epi32(1);
+
+	/* 8 packets EOP mask, second-LSB in each 32-bit value */
+	const __m256i eop_check = _mm256_slli_epi32(dd_check,
+			IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+
+	/* mask to shuffle from desc. to mbuf (2 descriptors)*/
+	const __m256i shuf_msk =
+		_mm256_set_epi8
+			(/* first descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF,	/*pkt_type set as unknown */
+			 /* second descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF	/*pkt_type set as unknown */
+			);
+	/**
+	 * compile-time check the above crc and shuffle layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi
+	 * calls above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Status/Error flag masks */
+	/**
+	 * mask everything except Checksum Reports, RSS indication
+	 * and VLAN indication.
+	 * bit6:4 for IP/L4 checksum errors.
+	 * bit12 is for RSS indication.
+	 * bit13 is for VLAN indication.
+	 */
+	const __m256i flags_mask =
+		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+	/**
+	 * data to be shuffled by the result of the flags mask shifted by 4
+	 * bits.  This gives use the l3_l4 flags.
+	 */
+	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			/* second 128-bits */
+			0, 0, 0, 0, 0, 0, 0, 0,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i cksum_mask =
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+				   PKT_RX_EIP_CKSUM_BAD);
+	/**
+	 * data to be shuffled by result of flag mask, shifted down 12.
+	 * If RSS(bit12)/VLAN(bit13) are set,
+	 * shuffle moves appropriate flags in place.
+	 */
+	const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0,
+			/* end up 128-bits */
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	uint16_t i, received;
+
+	for (i = 0, received = 0; i < nb_pkts;
+	     i += IAVF_DESCS_PER_LOOP_AVX,
+	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+				    _mm256_loadu_si256((void *)&sw_ring[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256
+			((void *)&rx_pkts[i + 4],
+			 _mm256_loadu_si256((void *)&sw_ring[i + 4]));
+#endif
+
+		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
+
+		if (split_packet) {
+			int j;
+
+			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		/**
+		 * convert descriptors 4-7 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk);
+		__m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk);
+
+		mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust);
+		mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptype_mask =
+			_mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M);
+		const __m256i ptypes6_7 =
+			_mm256_and_si256(raw_desc6_7, ptype_mask);
+		const __m256i ptypes4_5 =
+			_mm256_and_si256(raw_desc4_5, ptype_mask);
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1);
+
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4);
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0);
+		/* merge the status bits into one register */
+		const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7,
+				raw_desc4_5);
+
+		/**
+		 * convert descriptors 0-3 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk);
+		__m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk);
+
+		mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust);
+		mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptypes2_3 =
+			_mm256_and_si256(raw_desc2_3, ptype_mask);
+		const __m256i ptypes0_1 =
+			_mm256_and_si256(raw_desc0_1, ptype_mask);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1);
+
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4);
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0);
+		/* merge the status bits into one register */
+		const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3,
+								raw_desc0_1);
+
+		/**
+		 * take the two sets of status bits and merge to one
+		 * After merge, the packets status flags are in the
+		 * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]
+		 */
+		__m256i status0_7 = _mm256_unpacklo_epi64(status4_7,
+							  status0_3);
+
+		/* now do flag manipulation */
+
+		/* get only flag/error bits we want */
+		const __m256i flag_bits =
+			_mm256_and_si256(status0_7, flags_mask);
+		/**
+		 * l3_l4_error flags, shuffle, then shift to correct adjustment
+		 * of flags in flags_shuf, and finally mask out extra bits
+		 */
+		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
+				_mm256_srli_epi32(flag_bits, 4));
+		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
+		/* set rss and vlan flags */
+		const __m256i rss_vlan_flag_bits =
+			_mm256_srli_epi32(flag_bits, 12);
+		const __m256i rss_vlan_flags =
+			_mm256_shuffle_epi8(rss_vlan_flags_shuf,
+					    rss_vlan_flag_bits);
+
+		/* merge flags */
+		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+				rss_vlan_flags);
+		/**
+		 * At this point, we have the 8 sets of flags in the low 16-bits
+		 * of each 32-bit value in vlan0.
+		 * We want to extract these, and merge them with the mbuf init
+		 * data so we can do a single write to the mbuf to set the flags
+		 * and all the other initialization fields. Extracting the
+		 * appropriate flags means that we have to do a shift and blend
+		 * for each mbuf before we do the write. However, we can also
+		 * add in the previously computed rx_descriptor fields to
+		 * make a single 256-bit write per mbuf
+		 */
+		/* check the structure matches expectations */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				 offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				 RTE_ALIGN(offsetof(struct rte_mbuf,
+						    rearm_data),
+					   16));
+		/* build up data and do writes */
+		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
+			rearm6, rearm7;
+		rearm6 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 8),
+					    0x04);
+		rearm4 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 4),
+					    0x04);
+		rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm0 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(mbuf_flags, 4),
+					    0x04);
+		/* permute to add in the rx_descriptor e.g. rss fields */
+		rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20);
+		rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20);
+		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
+		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
+		/* write to mbuf */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,
+				    rearm6);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,
+				    rearm4);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,
+				    rearm2);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,
+				    rearm0);
+
+		/* repeat for the odd mbufs */
+		const __m256i odd_flags =
+			_mm256_castsi128_si256
+				(_mm256_extracti128_si256(mbuf_flags, 1));
+		rearm7 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 8),
+					    0x04);
+		rearm5 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 4),
+					    0x04);
+		rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04);
+		rearm1 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(odd_flags, 4),
+					    0x04);
+		/* since odd mbufs are already in hi 128-bits use blend */
+		rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0);
+		rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0);
+		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
+		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
+		/* again write to mbufs */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,
+				    rearm7);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,
+				    rearm5);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,
+				    rearm3);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,
+				    rearm1);
+
+		/* extract and record EOP bit */
+		if (split_packet) {
+			const __m128i eop_mask =
+				_mm_set1_epi16(1 <<
+					       IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+			const __m256i eop_bits256 = _mm256_and_si256(status0_7,
+								     eop_check);
+			/* pack status bits into a single 128-bit register */
+			const __m128i eop_bits =
+				_mm_packus_epi32
+					(_mm256_castsi256_si128(eop_bits256),
+					 _mm256_extractf128_si256(eop_bits256,
+								  1));
+			/**
+			 * flip bits, and mask out the EOP bit, which is now
+			 * a split-packet bit i.e. !EOP, rather than EOP one.
+			 */
+			__m128i split_bits = _mm_andnot_si128(eop_bits,
+					eop_mask);
+			/**
+			 * eop bits are out of order, so we need to shuffle them
+			 * back into order again. In doing so, only use low 8
+			 * bits, which acts like another pack instruction
+			 * The original order is (hi->lo): 1,3,5,7,0,2,4,6
+			 * [Since we use epi8, the 16-bit positions are
+			 * multiplied by 2 in the eop_shuffle value.]
+			 */
+			__m128i eop_shuffle =
+				_mm_set_epi8(/* zero hi 64b */
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     /* move values to lo 64b */
+					     8, 0, 10, 2,
+					     12, 4, 14, 6);
+			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
+			*(uint64_t *)split_packet =
+				_mm_cvtsi128_si64(split_bits);
+			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+		}
+
+		/* perform dd_check */
+		status0_7 = _mm256_and_si256(status0_7, dd_check);
+		status0_7 = _mm256_packs_epi32(status0_7,
+					       _mm256_setzero_si256());
+
+		uint64_t burst = __builtin_popcountll
+					(_mm_cvtsi128_si64
+						(_mm256_extracti128_si256
+							(status0_7, 1)));
+		burst += __builtin_popcountll
+				(_mm_cvtsi128_si64
+					(_mm256_castsi256_si128(status0_7)));
+		received += burst;
+		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+			break;
+	}
+
+	/* update tail pointers */
+	rxq->rx_tail += received;
+	rxq->rx_tail &= (rxq->nb_rx_desc - 1);
+	if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */
+		rxq->rx_tail--;
+		received--;
+	}
+	rxq->rxrearm_nb += received;
+	return received;
+}
+
 /**
  * Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
@@ -625,6 +1086,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/**
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts,
+						     nb_pkts, NULL);
+}
+
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
@@ -690,6 +1163,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_pkts + retval, nb_pkts);
 }
 
+/**
+ * vPMD receive routine that reassembles single burst of
+ * 32 scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+static uint16_t
+iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
+					    struct rte_mbuf **rx_pkts,
+					    uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
+					rx_pkts, nb_pkts, split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+					     &split_flags[i]);
+}
+
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD.
+ * Main receive routine that can handle arbitrary burst sizes
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst =
+			iavf_recv_scattered_burst_vec_avx2_flex_rxd
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
+				rx_pkts + retval, nb_pkts);
+}
+
 static inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 06/12] net/iavf: flexible Rx descriptor support in SSE path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (4 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 05/12] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 07/12] net/iavf: add flow director enabled switch value Leyi Rong
                     ` (5 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in SSE
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c         |   4 +-
 drivers/net/iavf/iavf_rxtx.h         |   5 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 4fa8d2358..d7471e600 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2087,7 +2087,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
-					iavf_recv_scattered_pkts_vec;
+					iavf_recv_scattered_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2 :
@@ -2100,7 +2100,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2_flex_rxd :
-					iavf_recv_pkts_vec;
+					iavf_recv_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2 :
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 8e1db2588..290dd68c1 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -406,9 +406,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				     uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+					       struct rte_mbuf **rx_pkts,
+					       uint16_t nb_pkts);
 uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 0365c49e1..9c1f2a445 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline void
+flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+		       struct rte_mbuf **rx_pkts)
+{
+	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	__m128i rearm0, rearm1, rearm2, rearm3;
+
+	__m128i tmp_desc, flags, rss_vlan;
+
+	/* mask everything except checksum, RSS and VLAN flags.
+	 * bit6:4 for checksum.
+	 * bit12 for RSS indication.
+	 * bit13 for VLAN indication.
+	 */
+	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
+						0x3070, 0x3070);
+
+	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD);
+
+	/* map the checksum, rss and vlan fields to the checksum, rss
+	 * and vlan flag
+	 */
+	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+
+	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	/* merge 4 descriptors */
+	flags = _mm_unpackhi_epi32(descs[0], descs[1]);
+	tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]);
+	tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc);
+	tmp_desc = _mm_and_si128(flags, desc_mask);
+
+	/* checksum flags */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 4);
+	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
+	/* then we shift left 1 bit */
+	flags = _mm_slli_epi32(flags, 1);
+	/* we need to mask out the reduntant bits introduced by RSS or
+	 * VLAN fields.
+	 */
+	flags = _mm_and_si128(flags, cksum_mask);
+
+	/* RSS, VLAN flag */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 8);
+	rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc);
+
+	/* merge the flags */
+	flags = _mm_or_si128(flags, rss_vlan);
+
+	/**
+	 * At this point, we have the 4 sets of flags in the low 16-bits
+	 * of each 32-bit value in flags.
+	 * We want to extract these, and merge them with the mbuf init data
+	 * so we can do a single 16-byte write to the mbuf to set the flags
+	 * and all the other initialization fields. Extracting the
+	 * appropriate flags means that we have to do a shift and blend for
+	 * each mbuf before we do the write.
+	 */
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+
+	/* write the rearm data and the olflags in one write */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_mbuf, rearm_data) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
 #define PKTLEN_SHIFT     10
 
 static inline void
@@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)];
 }
 
+static inline void
+flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
+		     const uint32_t *type_table)
+{
+	const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M);
+	__m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
+	__m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
+	__m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
+
+	ptype_all = _mm_and_si128(ptype_all, ptype_mask);
+
+	rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)];
+	rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)];
+	rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)];
+	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
+}
+
 /* Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+			    struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	__m128i crc_adjust = _mm_set_epi16
+				(0, 0, 0,       /* ignore non-length fields */
+				 -rxq->crc_len, /* sub crc on data_len */
+				 0,          /* ignore high-16bits of pkt_len */
+				 -rxq->crc_len, /* sub crc on pkt_len */
+				 0, 0           /* ignore pkt_type field */
+				);
+	const __m128i zero = _mm_setzero_si128();
+	/* mask to shuffle from desc. to mbuf */
+	const __m128i shuf_msk = _mm_set_epi8
+			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
+			 5, 4,        /* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			 5, 4,        /* octet 4~5, low 16 bits pkt_len */
+			 0xFF, 0xFF,  /* pkt_type set as unknown */
+			 0xFF, 0xFF   /* pkt_type set as unknown */
+			);
+	const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0x04, 0x0C,
+						   0x00, 0x08);
+
+	/**
+	 * compile-time check the above crc_adjust layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi16
+	 * call above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
+	/* 4 packets DD mask */
+	const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL,
+						0x0000000100000001LL);
+	/* 4 packets EOP mask */
+	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
+						 0x0000000200000002LL);
+
+	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
+
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > rxq->rx_free_thresh)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/**
+	 * Compile-time verify the shuffle mask
+	 * NOTE: some field positions already verified above, but duplicated
+	 * here for completeness in case of future modifications.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+	     pos += IAVF_VPMD_DESCS_PER_LOOP,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i staterr, sterr_tmp1, sterr_tmp2;
+		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
+		__m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+		__m128i mbp2;
+#endif
+
+		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.1 load 2 64 bit mbuf points */
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+#endif
+
+		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+		rte_compiler_barrier();
+		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
+
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+			 pkt_mb4);
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+			 pkt_mb3);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			/* and with mask to extract bits, flipping 1-0 */
+			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = _mm_and_si128(staterr, dd_check);
+		staterr = _mm_packs_epi32(staterr, zero);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+			 pkt_mb2);
+		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+				 pkt_mb1);
+		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		nb_pkts_recd += var;
+		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
 /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
@@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
+/* vPMD receive routine that reassembles scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	unsigned int i = 0;
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
+					      split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 07/12] net/iavf: add flow director enabled switch value
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (5 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 06/12] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 08/12] net/iavf: support flow mark in normal data path Leyi Rong
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

The commit adds fdir_enabled flag into iavf_adapter structure
to identify if fdir id is active. Rx data path can be benefit if
fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4fe15237a..1918a67f1 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -142,6 +142,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint8_t fdir_enabled;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 290dd68c1..b4f4c5131 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -490,6 +490,32 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+/* Enable/disable flow director Rx processing in data path. */
+static inline
+void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on)
+{
+	static uint32_t ref_cnt;
+
+	if (on) {
+		/* enable flow director processing */
+		if (ref_cnt++ == 0) {
+			ad->fdir_enabled = on;
+			PMD_DRV_LOG(DEBUG,
+				    "FDIR processing on RX set to %d", on);
+		}
+	} else {
+		if (ref_cnt >= 1) {
+			ref_cnt--;
+
+			if (ref_cnt == 0) {
+				ad->fdir_enabled = on;
+				PMD_DRV_LOG(DEBUG,
+					    "FDIR processing on RX set to %d", on);
+			}
+		}
+	}
+}
+
 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 08/12] net/iavf: support flow mark in normal data path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (6 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 07/12] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 09/12] net/iavf: support flow mark in AVX path Leyi Rong
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing in normal path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  3 +++
 drivers/net/iavf/iavf_rxtx.c | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 1918a67f1..533856f21 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -74,6 +74,9 @@
 #define IAVF_COMMS_PROTO_L2TPV3         0x0000000000000008
 #define IAVF_COMMS_PROTO_ESP            0x0000000000000010
 
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index d7471e600..848d28333 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -756,6 +756,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 					IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
 			IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+	/* Check if FDIR Match */
+	flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+				PKT_RX_FDIR : 0);
+
 	if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
 		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 		return flags;
@@ -776,6 +780,25 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+	uint64_t flags = 0;
+	uint16_t flexbh;
+
+	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+	if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+		mb->hash.fdir.hi =
+			rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+		flags |= PKT_RX_FDIR_ID;
+	}
+
+	return flags;
+}
+
 /* Translate the rx flex descriptor status to pkt flags */
 static inline void
 iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
@@ -792,6 +815,11 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 #endif
+
+	if (desc->flow_id != 0xFFFFFFFF) {
+		mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+	}
 }
 
 #define IAVF_RX_FLEX_ERR0_BITS	\
@@ -953,6 +981,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1351,6 +1382,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			first_seg->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
 		first_seg->ol_flags |= pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1517,6 +1551,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 				mb->hash.rss = rte_le_to_cpu_32(
 					rxdp[j].wb.qword0.hi_dword.rss);
 
+			if (pkt_flags & PKT_RX_FDIR)
+				pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
 			mb->ol_flags |= pkt_flags;
 		}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 09/12] net/iavf: support flow mark in AVX path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (7 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 08/12] net/iavf: support flow mark in normal data path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 10/12] net/iavf: support flow mark in SSE path Leyi Rong
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in AVX path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 82 +++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 5 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index b23188fd3..340bf5556 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -616,6 +616,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline __m256i
+flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC);
+	__m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask);
+	const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline uint16_t
 _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
@@ -678,8 +697,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m256i shuf_msk =
 		_mm256_set_epi8
 			(/* first descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -687,8 +706,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/* pkt_type set as unknown */
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -930,8 +949,61 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 					    rss_vlan_flag_bits);
 
 		/* merge flags */
-		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+		__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
 				rss_vlan_flags);
+
+		if (rxq->vsi->adapter->fdir_enabled) {
+			const __m256i fdir_id4_7 =
+				_mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5);
+
+			const __m256i fdir_id0_3 =
+				_mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1);
+
+			const __m256i fdir_id0_7 =
+				_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);
+
+			const __m256i fdir_flags =
+				flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7);
+
+			/* merge with fdir_flags */
+			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);
+
+			/* write to mbuf: have to use scalar store here */
+			uint32_t fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 3);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 0]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 7);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 1]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 2);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 2]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 6);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 3]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 1);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 4]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 5);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 5]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 0);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 6]->hash.fdir.hi = fdir_id_extr;
+
+			fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 4);
+			if (fdir_id_extr != FDID_MIS_MAGIC)
+				rx_pkts[i + 7]->hash.fdir.hi = fdir_id_extr;
+		} /* if() on fdir_enabled */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 10/12] net/iavf: support flow mark in SSE path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (8 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 09/12] net/iavf: support flow mark in AVX path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 11/12] net/iavf: add RSS hash parsing in AVX path Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 12/12] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in SSE path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 58 +++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 9c1f2a445..801a81620 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline __m128i
+flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC);
+	__m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask);
+	const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
@@ -267,6 +286,42 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
+	if (rxq->vsi->adapter->fdir_enabled) {
+		const __m128i fdir_id0_1 =
+			_mm_unpackhi_epi32(descs[0], descs[1]);
+
+		const __m128i fdir_id2_3 =
+			_mm_unpackhi_epi32(descs[2], descs[3]);
+
+		const __m128i fdir_id0_3 =
+			_mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3);
+
+		const __m128i fdir_flags =
+			flex_rxd_to_fdir_flags_vec(fdir_id0_3);
+
+		/* merge with fdir_flags */
+		flags = _mm_or_si128(flags, fdir_flags);
+
+		/* write fdir_id to mbuf if FDIR match */
+		uint32_t fdir_id_extr;
+
+		fdir_id_extr = _mm_extract_epi32(fdir_id0_3, 0);
+		if (fdir_id_extr != FDID_MIS_MAGIC)
+			rx_pkts[0]->hash.fdir.hi = fdir_id_extr;
+
+		fdir_id_extr = _mm_extract_epi32(fdir_id0_3, 1);
+		if (fdir_id_extr != FDID_MIS_MAGIC)
+			rx_pkts[1]->hash.fdir.hi = fdir_id_extr;
+
+		fdir_id_extr = _mm_extract_epi32(fdir_id0_3, 2);
+		if (fdir_id_extr != FDID_MIS_MAGIC)
+			rx_pkts[2]->hash.fdir.hi = fdir_id_extr;
+
+		fdir_id_extr = _mm_extract_epi32(fdir_id0_3, 3);
+		if (fdir_id_extr != FDID_MIS_MAGIC)
+			rx_pkts[3]->hash.fdir.hi = fdir_id_extr;
+	} /* if() on fdir_enabled */
+
 	/**
 	 * At this point, we have the 4 sets of flags in the low 16-bits
 	 * of each 32-bit value in flags.
@@ -604,7 +659,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i zero = _mm_setzero_si128();
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
-			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			(0xFF, 0xFF,
+			 0xFF, 0xFF,  /* rss not supported */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 11/12] net/iavf: add RSS hash parsing in AVX path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (9 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 10/12] net/iavf: support flow mark in SSE path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 12/12] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in AVX data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 340bf5556..b2b0406da 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		_mm256_set_epi8
 			(/* first descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -1004,6 +1004,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				rx_pkts[i + 7]->hash.fdir.hi = fdir_id_extr;
 		} /* if() on fdir_enabled */
 
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh7 =
+				_mm_load_si128
+					((void *)(&rxdp[7].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh6 =
+				_mm_load_si128
+					((void *)(&rxdp[6].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh5 =
+				_mm_load_si128
+					((void *)(&rxdp[5].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh4 =
+				_mm_load_si128
+					((void *)(&rxdp[4].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			__m256i raw_desc_bh6_7 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh6),
+					raw_desc_bh7, 1);
+			__m256i raw_desc_bh4_5 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh4),
+					raw_desc_bh5, 1);
+			__m256i raw_desc_bh2_3 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh2),
+					raw_desc_bh3, 1);
+			__m256i raw_desc_bh0_1 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh0),
+					raw_desc_bh1, 1);
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m256i rss_hash6_7 =
+				_mm256_slli_epi64(raw_desc_bh6_7, 32);
+			__m256i rss_hash4_5 =
+				_mm256_slli_epi64(raw_desc_bh4_5, 32);
+			__m256i rss_hash2_3 =
+				_mm256_slli_epi64(raw_desc_bh2_3, 32);
+			__m256i rss_hash0_1 =
+				_mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+			__m256i rss_hash_msk =
+				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+						 0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash6_7 = _mm256_and_si256
+					(rss_hash6_7, rss_hash_msk);
+			rss_hash4_5 = _mm256_and_si256
+					(rss_hash4_5, rss_hash_msk);
+			rss_hash2_3 = _mm256_and_si256
+					(rss_hash2_3, rss_hash_msk);
+			rss_hash0_1 = _mm256_and_si256
+					(rss_hash0_1, rss_hash_msk);
+
+			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+		} /* if() on RSS hash parsing */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v2 12/12] net/iavf: add RSS hash parsing in SSE path
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
                     ` (10 preceding siblings ...)
  2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 11/12] net/iavf: add RSS hash parsing in AVX path Leyi Rong
@ 2020-03-31  8:02   ` Leyi Rong
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-03-31  8:02 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in SSE data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 86 ++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 801a81620..5ae640743 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -660,7 +660,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
 			(0xFF, 0xFF,
-			 0xFF, 0xFF,  /* rss not supported */
+			 0xFF, 0xFF,  /* rss hash parsed separately */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -751,7 +751,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
 		__m128i mbp1;
@@ -797,8 +797,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		rte_compiler_barrier();
 
 		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -808,12 +812,66 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
-		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m128i rss_hash3 =
+				_mm_slli_epi64(raw_desc_bh3, 32);
+			__m128i rss_hash2 =
+				_mm_slli_epi64(raw_desc_bh2, 32);
+			__m128i rss_hash1 =
+				_mm_slli_epi64(raw_desc_bh1, 32);
+			__m128i rss_hash0 =
+				_mm_slli_epi64(raw_desc_bh0, 32);
+
+			__m128i rss_hash_msk =
+				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash3 = _mm_and_si128
+					(rss_hash3, rss_hash_msk);
+			rss_hash2 = _mm_and_si128
+					(rss_hash2, rss_hash_msk);
+			rss_hash1 = _mm_and_si128
+					(rss_hash1, rss_hash_msk);
+			rss_hash0 = _mm_and_si128
+					(rss_hash0, rss_hash_msk);
+
+			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+		} /* if() on RSS hash parsing */
 
 		/* C.2 get 4 pkts staterr value  */
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -821,14 +879,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-			 pkt_mb4);
+			 pkt_mb3);
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-			 pkt_mb3);
-
-		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+			 pkt_mb2);
 
 		/* C* extract and record EOP bit */
 		if (split_packet) {
@@ -852,9 +906,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-			 pkt_mb2);
+			 pkt_mb1);
 		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-				 pkt_mb1);
+				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (12 preceding siblings ...)
  2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-08  6:21 ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
                     ` (10 more replies)
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                   ` (2 subsequent siblings)
  16 siblings, 11 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:21 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

This patchset enable framework for advanced iAVF, includes
flexible descriptor support, FDIR mark id and RSS hash support.

---
v3:
- Remove patch to query DDP package info as it's specific to DCF.
- Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
- Move fdir_enabled flag to per Queue value for cache benefit when
  access in vector routines.
- Store extracted flow_id value to mbuf without judgement to avoid
  branch speculation fail, which is benefit to performance.

v2:
- Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
  it's defined in AVF spec.
- Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
- Move flex desc definitions into iavf_rxtx.h.
- Up to date to match with the latest version of virtchnl.h.
- Extract a new internal func iavf_update_rx_tail to call.
- Remove
  iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
  as the accompanying legacy ones can deal with the flex desc cases.
- Move rxq->rxdid assignment from iavf_configure_queues()
  to iavf_dev_rx_queue_setup().
- Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
  when using GCC compile option -O0.


Leyi Rong (11):
  net/iavf: flexible Rx descriptor definitions
  net/iavf: return error if opcode is mismatched
  net/iavf: flexible Rx descriptor support in normal path
  net/iavf: flexible Rx descriptor support in AVX path
  net/iavf: flexible Rx descriptor support in SSE path
  net/iavf: add flow director enabled switch value
  net/iavf: support flow mark in normal data path
  net/iavf: support flow mark in AVX path
  net/iavf: support flow mark in SSE path
  net/iavf: add RSS hash parsing in AVX path
  net/iavf: add RSS hash parsing in SSE path

 drivers/net/iavf/iavf.h               |   6 +
 drivers/net/iavf/iavf_ethdev.c        |   8 +
 drivers/net/iavf/iavf_rxtx.c          | 540 ++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx.h          | 248 +++++++++
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 700 +++++++++++++++++++++++++-
 drivers/net/iavf/iavf_rxtx_vec_sse.c  | 518 +++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c         |  46 +-
 7 files changed, 2017 insertions(+), 49 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 01/11] net/iavf: flexible Rx descriptor definitions
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
                     ` (9 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Add definitions for flexible Rx descriptor structures and macros.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.h | 200 +++++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 09b5bd99e..5e309631e 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -157,6 +157,206 @@ union iavf_tx_offload {
 	};
 };
 
+/* Rx Flex Descriptors
+ * These descriptors are used instead of the legacy version descriptors
+ */
+union iavf_16b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+	} wb; /* writeback */
+};
+
+union iavf_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 16-21
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Flow ID upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 22-23 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms_ovs {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 flow_id;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 rss_hash;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum iavf_rxdid {
+	IAVF_RXDID_LEGACY_0		= 0,
+	IAVF_RXDID_LEGACY_1		= 1,
+	IAVF_RXDID_FLEX_NIC		= 2,
+	IAVF_RXDID_FLEX_NIC_2		= 6,
+	IAVF_RXDID_HW			= 7,
+	IAVF_RXDID_COMMS_GENERIC	= 16,
+	IAVF_RXDID_COMMS_AUX_VLAN	= 17,
+	IAVF_RXDID_COMMS_AUX_IPV4	= 18,
+	IAVF_RXDID_COMMS_AUX_IPV6	= 19,
+	IAVF_RXDID_COMMS_AUX_IPV6_FLOW	= 20,
+	IAVF_RXDID_COMMS_AUX_TCP	= 21,
+	IAVF_RXDID_COMMS_OVS_1		= 22,
+	IAVF_RXDID_COMMS_OVS_2		= 23,
+	IAVF_RXDID_LAST			= 63,
+};
+
+enum iavf_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	IAVF_RX_FLEX_DESC_STATUS0_EOF_S,
+	IAVF_RX_FLEX_DESC_STATUS0_HBO_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LPBK_S,
+	IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RXE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_CRCP_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
+#define IAVF_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for iavf_32b_rx_flex_desc.pkt_len member */
+#define IAVF_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
 int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			   uint16_t queue_idx,
 			   uint16_t nb_desc,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 02/11] net/iavf: return error if opcode is mismatched
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
                     ` (8 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Adds error return when the opcode of read message is
mismatched which is received from adminQ.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_vchnl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index fa4da3a6d..b7fb05d32 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len,
 	PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d",
 		    opcode, vf->cmd_retval);
 
-	if (opcode != vf->pend_cmd)
+	if (opcode != vf->pend_cmd) {
 		PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u",
 			    vf->pend_cmd, opcode);
+		return IAVF_ERR_OPCODE_MISMATCH;
+	}
 
 	return IAVF_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 03/11] net/iavf: flexible Rx descriptor support in normal path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
                     ` (7 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in normal
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |   2 +
 drivers/net/iavf/iavf_ethdev.c |   8 +
 drivers/net/iavf/iavf_rxtx.c   | 479 ++++++++++++++++++++++++++++++---
 drivers/net/iavf/iavf_rxtx.h   |   8 +
 drivers/net/iavf/iavf_vchnl.c  |  42 ++-
 5 files changed, 501 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 526040c6e..67d625053 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -97,6 +97,7 @@ struct iavf_info {
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
+	uint64_t supported_rxdid;
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -225,6 +226,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_get_supported_rxdid(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
 int iavf_dev_link_update(struct rte_eth_dev *dev,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 7a8bec9c9..32df64121 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1236,6 +1236,14 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+		if (iavf_get_supported_rxdid(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+			goto err_rss;
+		}
+	}
+
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 9eccb7c41..67297dcb7 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -346,6 +346,14 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	if (vf->vf_res->vf_cap_flags &
+	    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+	    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+		rxq->rxdid = IAVF_RXDID_COMMS_OVS_1;
+	} else {
+		rxq->rxdid = IAVF_RXDID_LEGACY_1;
+	}
+
 	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_free_thresh;
@@ -720,6 +728,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 	}
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+			  volatile union iavf_rx_flex_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -754,6 +776,87 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+/* Translate the rx flex descriptor status to pkt flags */
+static inline void
+iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
+		       volatile union iavf_rx_flex_desc *rxdp)
+{
+	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+	uint16_t stat_err;
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	stat_err = rte_le_to_cpu_16(desc->status_error0);
+	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+		mb->ol_flags |= PKT_RX_RSS_HASH;
+		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+	}
+#endif
+}
+
+#define IAVF_RX_FLEX_ERR0_BITS	\
+	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+	uint64_t flags = 0;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+		return 0;
+
+	if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
+/* If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register. Update the RDT with the value of the last processed RX
+ * descriptor minus 1, to guarantee that the RDT register is never
+ * equal to the RDH register, which creates a "full" ring situtation
+ * from the hardware point of view.
+ */
+static inline void
+iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+{
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG,
+			   "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -854,23 +957,256 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	}
 	rxq->rx_tail = rx_id;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the receive tail register of queue.
-	 * Update that register with the value of the last processed RX
-	 * descriptor minus 1.
-	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)((rx_id == 0) ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	volatile union iavf_rx_desc *rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint16_t rx_stat_err0;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit first */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
+		iavf_rxd_to_pkt_fields(rxm, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
 	}
-	rxq->nb_rx_hold = nb_hold;
+	rxq->rx_tail = rx_id;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint16_t rx_stat_err0;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+			if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(RTE_ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else {
+				rxm->data_len = (uint16_t)(rx_packet_len -
+							RTE_ETHER_CRC_LEN);
+			}
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
+		iavf_rxd_to_pkt_fields(first_seg, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
 
 	return nb_rx;
 }
@@ -1027,30 +1363,88 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->pkt_first_seg = first_seg;
 	rxq->pkt_last_seg = last_seg;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
-	 * register. Update the RDT with the value of the last processed RX
-	 * descriptor minus 1, to guarantee that the RDT register is never
-	 * equal to the RDH register, which creates a "full" ring situtation
-	 * from the hardware point of view.
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+#define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *mb;
+	uint16_t stat_err0;
+	uint16_t pkt_len;
+	int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+	int32_t i, j, nb_rx = 0;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+	/* Make sure there is at least 1 packet to receive */
+	if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* Scan LOOK_AHEAD descriptors at a time to determine which
+	 * descriptors reference packets that are ready to be received.
 	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)(rx_id == 0 ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+	     rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+			s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+			nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf parameters */
+		for (j = 0; j < nb_dd; j++) {
+			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+					  rxq->rx_tail +
+					  i * IAVF_LOOK_AHEAD + j);
+
+			mb = rxep[j];
+			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->ol_flags = 0;
+
+			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
+			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
+			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+			mb->ol_flags |= pkt_flags;
+		}
+
+		for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+			rxq->rx_stage[i + j] = rxep[j];
+
+		if (nb_dd != IAVF_LOOK_AHEAD)
+			break;
 	}
-	rxq->nb_rx_hold = nb_hold;
+
+	/* Clear software ring entries */
+	for (i = 0; i < nb_rx; i++)
+		rxq->sw_ring[rxq->rx_tail + i] = NULL;
 
 	return nb_rx;
 }
 
-#define IAVF_LOOK_AHEAD 8
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1219,7 +1613,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (rxq->rx_nb_avail)
 		return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-	nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+	else
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
 	rxq->rx_next_avail = 0;
 	rxq->rx_nb_avail = nb_rx;
 	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1663,6 +2060,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
 	struct iavf_rx_queue *rxq;
 	int i;
@@ -1702,7 +2100,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (dev->data->scattered_rx) {
 		PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
 			    dev->data->port_id);
@@ -1710,7 +2111,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	} else {
 		PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_pkts;
 	}
 }
 
@@ -1797,6 +2201,7 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 5e309631e..f33d1df41 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -62,6 +62,7 @@
 #define iavf_rx_desc iavf_16byte_rx_desc
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
+#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
 #endif
 
 struct iavf_rxq_ops {
@@ -87,6 +88,7 @@ struct iavf_rx_queue {
 	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
 	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
 	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
+	uint8_t rxdid;
 
 	/* used for VPMD */
 	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
@@ -379,9 +381,15 @@ void iavf_dev_tx_queue_release(void *txq);
 void iavf_stop_queues(struct rte_eth_dev *dev);
 uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts(void *rx_queue,
 				 struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		       uint16_t nb_pkts);
 uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index b7fb05d32..3f0d23a92 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 * add advanced/optional offload capabilities
 	 */
 
-	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+		VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -375,6 +377,32 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	return 0;
 }
 
+int
+iavf_get_supported_rxdid(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_SUPPORTED_RXDIDS");
+		return ret;
+	}
+
+	vf->supported_rxdid =
+		((struct virtchnl_supported_rxdids *)args.out_buffer)->supported_rxdids;
+
+	return 0;
+}
+
 int
 iavf_enable_queues(struct iavf_adapter *adapter)
 {
@@ -567,6 +595,18 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 04/11] net/iavf: flexible Rx descriptor support in AVX path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (2 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
                     ` (6 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in AVX
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c          |  24 +-
 drivers/net/iavf/iavf_rxtx.h          |   6 +
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 550 +++++++++++++++++++++++++-
 3 files changed, 570 insertions(+), 10 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 67297dcb7..34c41d104 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2081,16 +2081,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				    "Using %sVector Scattered Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_scattered_pkts_vec_avx2 :
-					    iavf_recv_scattered_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+					iavf_recv_scattered_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2 :
+					iavf_recv_scattered_pkts_vec;
 		} else {
 			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_pkts_vec_avx2 :
-					    iavf_recv_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2_flex_rxd :
+					iavf_recv_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2 :
+					iavf_recv_pkts_vec;
 		}
 
 		return;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index f33d1df41..8e1db2588 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -413,9 +413,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					  struct rte_mbuf **rx_pkts,
+					  uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue,
 					   struct rte_mbuf **rx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+						    struct rte_mbuf **rx_pkts,
+						    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 2587083d8..b23188fd3 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -11,14 +11,16 @@
 #endif
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
 	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) {
+		volatile union iavf_rx_flex_desc *rxdp =
+			(union iavf_rx_flex_desc *)rxdp;
+	}
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -160,7 +162,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	 * of time to act
 	 */
 	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
-		iavf_rxq_rearm(rxq);
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
@@ -614,6 +616,465 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline uint16_t
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts, uint8_t *split_packet)
+{
+#define IAVF_DESCS_PER_LOOP_AVX 8
+
+	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
+			0, rxq->mbuf_initializer);
+	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_flex_desc *rxdp =
+		(union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+		/* iavf_rxq_rearm(rxq); */
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* constants used in processing loop */
+	const __m256i crc_adjust =
+		_mm256_set_epi16
+			(/* first descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0,          /* ignore pkt_type field */
+			 /* second descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0           /* ignore pkt_type field */
+			);
+
+	/* 8 packets DD mask, LSB in each 32-bit value */
+	const __m256i dd_check = _mm256_set1_epi32(1);
+
+	/* 8 packets EOP mask, second-LSB in each 32-bit value */
+	const __m256i eop_check = _mm256_slli_epi32(dd_check,
+			IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+
+	/* mask to shuffle from desc. to mbuf (2 descriptors)*/
+	const __m256i shuf_msk =
+		_mm256_set_epi8
+			(/* first descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF,	/*pkt_type set as unknown */
+			 /* second descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF	/*pkt_type set as unknown */
+			);
+	/**
+	 * compile-time check the above crc and shuffle layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi
+	 * calls above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Status/Error flag masks */
+	/**
+	 * mask everything except Checksum Reports, RSS indication
+	 * and VLAN indication.
+	 * bit6:4 for IP/L4 checksum errors.
+	 * bit12 is for RSS indication.
+	 * bit13 is for VLAN indication.
+	 */
+	const __m256i flags_mask =
+		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+	/**
+	 * data to be shuffled by the result of the flags mask shifted by 4
+	 * bits.  This gives use the l3_l4 flags.
+	 */
+	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			/* second 128-bits */
+			0, 0, 0, 0, 0, 0, 0, 0,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i cksum_mask =
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+				   PKT_RX_EIP_CKSUM_BAD);
+	/**
+	 * data to be shuffled by result of flag mask, shifted down 12.
+	 * If RSS(bit12)/VLAN(bit13) are set,
+	 * shuffle moves appropriate flags in place.
+	 */
+	const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0,
+			/* end up 128-bits */
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	uint16_t i, received;
+
+	for (i = 0, received = 0; i < nb_pkts;
+	     i += IAVF_DESCS_PER_LOOP_AVX,
+	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+				    _mm256_loadu_si256((void *)&sw_ring[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256
+			((void *)&rx_pkts[i + 4],
+			 _mm256_loadu_si256((void *)&sw_ring[i + 4]));
+#endif
+
+		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
+
+		if (split_packet) {
+			int j;
+
+			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		/**
+		 * convert descriptors 4-7 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk);
+		__m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk);
+
+		mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust);
+		mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptype_mask =
+			_mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M);
+		const __m256i ptypes6_7 =
+			_mm256_and_si256(raw_desc6_7, ptype_mask);
+		const __m256i ptypes4_5 =
+			_mm256_and_si256(raw_desc4_5, ptype_mask);
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1);
+
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4);
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0);
+		/* merge the status bits into one register */
+		const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7,
+				raw_desc4_5);
+
+		/**
+		 * convert descriptors 0-3 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk);
+		__m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk);
+
+		mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust);
+		mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptypes2_3 =
+			_mm256_and_si256(raw_desc2_3, ptype_mask);
+		const __m256i ptypes0_1 =
+			_mm256_and_si256(raw_desc0_1, ptype_mask);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1);
+
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4);
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0);
+		/* merge the status bits into one register */
+		const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3,
+								raw_desc0_1);
+
+		/**
+		 * take the two sets of status bits and merge to one
+		 * After merge, the packets status flags are in the
+		 * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]
+		 */
+		__m256i status0_7 = _mm256_unpacklo_epi64(status4_7,
+							  status0_3);
+
+		/* now do flag manipulation */
+
+		/* get only flag/error bits we want */
+		const __m256i flag_bits =
+			_mm256_and_si256(status0_7, flags_mask);
+		/**
+		 * l3_l4_error flags, shuffle, then shift to correct adjustment
+		 * of flags in flags_shuf, and finally mask out extra bits
+		 */
+		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
+				_mm256_srli_epi32(flag_bits, 4));
+		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
+		/* set rss and vlan flags */
+		const __m256i rss_vlan_flag_bits =
+			_mm256_srli_epi32(flag_bits, 12);
+		const __m256i rss_vlan_flags =
+			_mm256_shuffle_epi8(rss_vlan_flags_shuf,
+					    rss_vlan_flag_bits);
+
+		/* merge flags */
+		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+				rss_vlan_flags);
+		/**
+		 * At this point, we have the 8 sets of flags in the low 16-bits
+		 * of each 32-bit value in vlan0.
+		 * We want to extract these, and merge them with the mbuf init
+		 * data so we can do a single write to the mbuf to set the flags
+		 * and all the other initialization fields. Extracting the
+		 * appropriate flags means that we have to do a shift and blend
+		 * for each mbuf before we do the write. However, we can also
+		 * add in the previously computed rx_descriptor fields to
+		 * make a single 256-bit write per mbuf
+		 */
+		/* check the structure matches expectations */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				 offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				 RTE_ALIGN(offsetof(struct rte_mbuf,
+						    rearm_data),
+					   16));
+		/* build up data and do writes */
+		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
+			rearm6, rearm7;
+		rearm6 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 8),
+					    0x04);
+		rearm4 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 4),
+					    0x04);
+		rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm0 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(mbuf_flags, 4),
+					    0x04);
+		/* permute to add in the rx_descriptor e.g. rss fields */
+		rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20);
+		rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20);
+		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
+		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
+		/* write to mbuf */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,
+				    rearm6);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,
+				    rearm4);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,
+				    rearm2);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,
+				    rearm0);
+
+		/* repeat for the odd mbufs */
+		const __m256i odd_flags =
+			_mm256_castsi128_si256
+				(_mm256_extracti128_si256(mbuf_flags, 1));
+		rearm7 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 8),
+					    0x04);
+		rearm5 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 4),
+					    0x04);
+		rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04);
+		rearm1 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(odd_flags, 4),
+					    0x04);
+		/* since odd mbufs are already in hi 128-bits use blend */
+		rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0);
+		rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0);
+		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
+		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
+		/* again write to mbufs */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,
+				    rearm7);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,
+				    rearm5);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,
+				    rearm3);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,
+				    rearm1);
+
+		/* extract and record EOP bit */
+		if (split_packet) {
+			const __m128i eop_mask =
+				_mm_set1_epi16(1 <<
+					       IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+			const __m256i eop_bits256 = _mm256_and_si256(status0_7,
+								     eop_check);
+			/* pack status bits into a single 128-bit register */
+			const __m128i eop_bits =
+				_mm_packus_epi32
+					(_mm256_castsi256_si128(eop_bits256),
+					 _mm256_extractf128_si256(eop_bits256,
+								  1));
+			/**
+			 * flip bits, and mask out the EOP bit, which is now
+			 * a split-packet bit i.e. !EOP, rather than EOP one.
+			 */
+			__m128i split_bits = _mm_andnot_si128(eop_bits,
+					eop_mask);
+			/**
+			 * eop bits are out of order, so we need to shuffle them
+			 * back into order again. In doing so, only use low 8
+			 * bits, which acts like another pack instruction
+			 * The original order is (hi->lo): 1,3,5,7,0,2,4,6
+			 * [Since we use epi8, the 16-bit positions are
+			 * multiplied by 2 in the eop_shuffle value.]
+			 */
+			__m128i eop_shuffle =
+				_mm_set_epi8(/* zero hi 64b */
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     /* move values to lo 64b */
+					     8, 0, 10, 2,
+					     12, 4, 14, 6);
+			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
+			*(uint64_t *)split_packet =
+				_mm_cvtsi128_si64(split_bits);
+			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+		}
+
+		/* perform dd_check */
+		status0_7 = _mm256_and_si256(status0_7, dd_check);
+		status0_7 = _mm256_packs_epi32(status0_7,
+					       _mm256_setzero_si256());
+
+		uint64_t burst = __builtin_popcountll
+					(_mm_cvtsi128_si64
+						(_mm256_extracti128_si256
+							(status0_7, 1)));
+		burst += __builtin_popcountll
+				(_mm_cvtsi128_si64
+					(_mm256_castsi256_si128(status0_7)));
+		received += burst;
+		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+			break;
+	}
+
+	/* update tail pointers */
+	rxq->rx_tail += received;
+	rxq->rx_tail &= (rxq->nb_rx_desc - 1);
+	if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */
+		rxq->rx_tail--;
+		received--;
+	}
+	rxq->rxrearm_nb += received;
+	return received;
+}
+
 /**
  * Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
@@ -625,6 +1086,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/**
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts,
+						     nb_pkts, NULL);
+}
+
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
@@ -690,6 +1163,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_pkts + retval, nb_pkts);
 }
 
+/**
+ * vPMD receive routine that reassembles single burst of
+ * 32 scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+static uint16_t
+iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
+					    struct rte_mbuf **rx_pkts,
+					    uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
+					rx_pkts, nb_pkts, split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+					     &split_flags[i]);
+}
+
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD.
+ * Main receive routine that can handle arbitrary burst sizes
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst =
+			iavf_recv_scattered_burst_vec_avx2_flex_rxd
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
+				rx_pkts + retval, nb_pkts);
+}
+
 static inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 05/11] net/iavf: flexible Rx descriptor support in SSE path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (3 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 06/11] net/iavf: add flow director enabled switch value Leyi Rong
                     ` (5 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in SSE
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c         |   4 +-
 drivers/net/iavf/iavf_rxtx.h         |   5 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 34c41d104..09ce5e3f3 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2085,7 +2085,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
-					iavf_recv_scattered_pkts_vec;
+					iavf_recv_scattered_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2 :
@@ -2098,7 +2098,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2_flex_rxd :
-					iavf_recv_pkts_vec;
+					iavf_recv_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2 :
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 8e1db2588..290dd68c1 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -406,9 +406,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				     uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+					       struct rte_mbuf **rx_pkts,
+					       uint16_t nb_pkts);
 uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 0365c49e1..9c1f2a445 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline void
+flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+		       struct rte_mbuf **rx_pkts)
+{
+	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	__m128i rearm0, rearm1, rearm2, rearm3;
+
+	__m128i tmp_desc, flags, rss_vlan;
+
+	/* mask everything except checksum, RSS and VLAN flags.
+	 * bit6:4 for checksum.
+	 * bit12 for RSS indication.
+	 * bit13 for VLAN indication.
+	 */
+	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
+						0x3070, 0x3070);
+
+	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD);
+
+	/* map the checksum, rss and vlan fields to the checksum, rss
+	 * and vlan flag
+	 */
+	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+
+	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	/* merge 4 descriptors */
+	flags = _mm_unpackhi_epi32(descs[0], descs[1]);
+	tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]);
+	tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc);
+	tmp_desc = _mm_and_si128(flags, desc_mask);
+
+	/* checksum flags */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 4);
+	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
+	/* then we shift left 1 bit */
+	flags = _mm_slli_epi32(flags, 1);
+	/* we need to mask out the reduntant bits introduced by RSS or
+	 * VLAN fields.
+	 */
+	flags = _mm_and_si128(flags, cksum_mask);
+
+	/* RSS, VLAN flag */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 8);
+	rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc);
+
+	/* merge the flags */
+	flags = _mm_or_si128(flags, rss_vlan);
+
+	/**
+	 * At this point, we have the 4 sets of flags in the low 16-bits
+	 * of each 32-bit value in flags.
+	 * We want to extract these, and merge them with the mbuf init data
+	 * so we can do a single 16-byte write to the mbuf to set the flags
+	 * and all the other initialization fields. Extracting the
+	 * appropriate flags means that we have to do a shift and blend for
+	 * each mbuf before we do the write.
+	 */
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+
+	/* write the rearm data and the olflags in one write */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_mbuf, rearm_data) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
 #define PKTLEN_SHIFT     10
 
 static inline void
@@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)];
 }
 
+static inline void
+flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
+		     const uint32_t *type_table)
+{
+	const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M);
+	__m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
+	__m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
+	__m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
+
+	ptype_all = _mm_and_si128(ptype_all, ptype_mask);
+
+	rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)];
+	rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)];
+	rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)];
+	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
+}
+
 /* Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+			    struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	__m128i crc_adjust = _mm_set_epi16
+				(0, 0, 0,       /* ignore non-length fields */
+				 -rxq->crc_len, /* sub crc on data_len */
+				 0,          /* ignore high-16bits of pkt_len */
+				 -rxq->crc_len, /* sub crc on pkt_len */
+				 0, 0           /* ignore pkt_type field */
+				);
+	const __m128i zero = _mm_setzero_si128();
+	/* mask to shuffle from desc. to mbuf */
+	const __m128i shuf_msk = _mm_set_epi8
+			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
+			 5, 4,        /* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			 5, 4,        /* octet 4~5, low 16 bits pkt_len */
+			 0xFF, 0xFF,  /* pkt_type set as unknown */
+			 0xFF, 0xFF   /* pkt_type set as unknown */
+			);
+	const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0x04, 0x0C,
+						   0x00, 0x08);
+
+	/**
+	 * compile-time check the above crc_adjust layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi16
+	 * call above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
+	/* 4 packets DD mask */
+	const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL,
+						0x0000000100000001LL);
+	/* 4 packets EOP mask */
+	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
+						 0x0000000200000002LL);
+
+	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
+
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > rxq->rx_free_thresh)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/**
+	 * Compile-time verify the shuffle mask
+	 * NOTE: some field positions already verified above, but duplicated
+	 * here for completeness in case of future modifications.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+	     pos += IAVF_VPMD_DESCS_PER_LOOP,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i staterr, sterr_tmp1, sterr_tmp2;
+		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
+		__m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+		__m128i mbp2;
+#endif
+
+		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.1 load 2 64 bit mbuf points */
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+#endif
+
+		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+		rte_compiler_barrier();
+		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
+
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+			 pkt_mb4);
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+			 pkt_mb3);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			/* and with mask to extract bits, flipping 1-0 */
+			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = _mm_and_si128(staterr, dd_check);
+		staterr = _mm_packs_epi32(staterr, zero);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+			 pkt_mb2);
+		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+				 pkt_mb1);
+		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		nb_pkts_recd += var;
+		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
 /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
@@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
+/* vPMD receive routine that reassembles scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	unsigned int i = 0;
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
+					      split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 06/11] net/iavf: add flow director enabled switch value
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (4 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 07/11] net/iavf: support flow mark in normal data path Leyi Rong
                     ` (4 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

The commit adds fdir_enabled flag into iavf_rx_queue structure
to identify if fdir id is active. Rx data path can be benefit if
fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 67d625053..0cd0117c2 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -134,6 +134,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint16_t fdir_ref_cnt;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 290dd68c1..46ffbb738 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -103,6 +103,7 @@ struct iavf_rx_queue {
 
 	uint16_t port_id;        /* device port ID */
 	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
+	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
 	uint16_t queue_id;      /* Rx queue index */
 	uint16_t rx_buf_len;    /* The packet buffer size */
 	uint16_t rx_hdr_len;    /* The header buffer size */
@@ -490,6 +491,34 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+#define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
+	for (int i = 0; i < ad->eth_dev->data->nb_rx_queues; i++) { \
+		struct iavf_rx_queue *rxq = ad->eth_dev->data->rx_queues[i]; \
+		if (!rxq) \
+			continue; \
+		rxq->fdir_enabled = on; \
+	} \
+	PMD_DRV_LOG(DEBUG, "FDIR processing on RX set to %d", on); \
+} while (0)
+
+/* Enable/disable flow director Rx processing in data path. */
+static inline
+void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on)
+{
+	if (on) {
+		/* enable flow director processing */
+		if (ad->fdir_ref_cnt++ == 0)
+			FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+	} else {
+		if (ad->fdir_ref_cnt >= 1) {
+			ad->fdir_ref_cnt--;
+
+			if (ad->fdir_ref_cnt == 0)
+				FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+		}
+	}
+}
+
 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 07/11] net/iavf: support flow mark in normal data path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (5 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 06/11] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 08/11] net/iavf: support flow mark in AVX path Leyi Rong
                     ` (3 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing in normal path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  3 +++
 drivers/net/iavf/iavf_rxtx.c | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 0cd0117c2..b63efd4e8 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -67,6 +67,9 @@
 #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define IAVF_48_BIT_MASK  RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t)
 
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 09ce5e3f3..725dd9e45 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -756,6 +756,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 					IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
 			IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+	/* Check if FDIR Match */
+	flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+				PKT_RX_FDIR : 0);
+
 	if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
 		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 		return flags;
@@ -776,6 +780,25 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+	uint64_t flags = 0;
+	uint16_t flexbh;
+
+	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+	if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+		mb->hash.fdir.hi =
+			rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+		flags |= PKT_RX_FDIR_ID;
+	}
+
+	return flags;
+}
+
 /* Translate the rx flex descriptor status to pkt flags */
 static inline void
 iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
@@ -792,6 +815,11 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 #endif
+
+	if (desc->flow_id != 0xFFFFFFFF) {
+		mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+	}
 }
 
 #define IAVF_RX_FLEX_ERR0_BITS	\
@@ -951,6 +979,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1349,6 +1380,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			first_seg->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
 		first_seg->ol_flags |= pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1515,6 +1549,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 				mb->hash.rss = rte_le_to_cpu_32(
 					rxdp[j].wb.qword0.hi_dword.rss);
 
+			if (pkt_flags & PKT_RX_FDIR)
+				pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
 			mb->ol_flags |= pkt_flags;
 		}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 08/11] net/iavf: support flow mark in AVX path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (6 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 07/11] net/iavf: support flow mark in normal data path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 09/11] net/iavf: support flow mark in SSE path Leyi Rong
                     ` (2 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in AVX path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 72 +++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index b23188fd3..3bf5833fa 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -616,6 +616,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline __m256i
+flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC);
+	__m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask);
+	const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline uint16_t
 _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
@@ -678,8 +697,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m256i shuf_msk =
 		_mm256_set_epi8
 			(/* first descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -687,8 +706,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/* pkt_type set as unknown */
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -930,8 +949,51 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 					    rss_vlan_flag_bits);
 
 		/* merge flags */
-		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+		__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
 				rss_vlan_flags);
+
+		if (rxq->fdir_enabled) {
+			const __m256i fdir_id4_7 =
+				_mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5);
+
+			const __m256i fdir_id0_3 =
+				_mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1);
+
+			const __m256i fdir_id0_7 =
+				_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);
+
+			const __m256i fdir_flags =
+				flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7);
+
+			/* merge with fdir_flags */
+			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);
+
+			/* write to mbuf: have to use scalar store here */
+			rx_pkts[i + 0]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 3);
+
+			rx_pkts[i + 1]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 7);
+
+			rx_pkts[i + 2]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 2);
+
+			rx_pkts[i + 3]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 6);
+
+			rx_pkts[i + 4]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 1);
+
+			rx_pkts[i + 5]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 5);
+
+			rx_pkts[i + 6]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 0);
+
+			rx_pkts[i + 7]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 4);
+		} /* if() on fdir_enabled */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 09/11] net/iavf: support flow mark in SSE path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (7 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 08/11] net/iavf: support flow mark in AVX path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in SSE path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 52 +++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 9c1f2a445..d7d840853 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline __m128i
+flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC);
+	__m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask);
+	const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
@@ -267,6 +286,36 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
+	if (rxq->fdir_enabled) {
+		const __m128i fdir_id0_1 =
+			_mm_unpackhi_epi32(descs[0], descs[1]);
+
+		const __m128i fdir_id2_3 =
+			_mm_unpackhi_epi32(descs[2], descs[3]);
+
+		const __m128i fdir_id0_3 =
+			_mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3);
+
+		const __m128i fdir_flags =
+			flex_rxd_to_fdir_flags_vec(fdir_id0_3);
+
+		/* merge with fdir_flags */
+		flags = _mm_or_si128(flags, fdir_flags);
+
+		/* write fdir_id to mbuf */
+		rx_pkts[0]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 0);
+
+		rx_pkts[1]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 1);
+
+		rx_pkts[2]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 2);
+
+		rx_pkts[3]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 3);
+	} /* if() on fdir_enabled */
+
 	/**
 	 * At this point, we have the 4 sets of flags in the low 16-bits
 	 * of each 32-bit value in flags.
@@ -604,7 +653,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i zero = _mm_setzero_si128();
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
-			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			(0xFF, 0xFF,
+			 0xFF, 0xFF,  /* rss not supported */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 10/11] net/iavf: add RSS hash parsing in AVX path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (8 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 09/11] net/iavf: support flow mark in SSE path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in AVX data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 3bf5833fa..22f1b7887 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		_mm256_set_epi8
 			(/* first descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				_mm256_extract_epi32(fdir_id0_7, 4);
 		} /* if() on fdir_enabled */
 
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh7 =
+				_mm_load_si128
+					((void *)(&rxdp[7].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh6 =
+				_mm_load_si128
+					((void *)(&rxdp[6].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh5 =
+				_mm_load_si128
+					((void *)(&rxdp[5].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh4 =
+				_mm_load_si128
+					((void *)(&rxdp[4].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			__m256i raw_desc_bh6_7 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh6),
+					raw_desc_bh7, 1);
+			__m256i raw_desc_bh4_5 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh4),
+					raw_desc_bh5, 1);
+			__m256i raw_desc_bh2_3 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh2),
+					raw_desc_bh3, 1);
+			__m256i raw_desc_bh0_1 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh0),
+					raw_desc_bh1, 1);
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m256i rss_hash6_7 =
+				_mm256_slli_epi64(raw_desc_bh6_7, 32);
+			__m256i rss_hash4_5 =
+				_mm256_slli_epi64(raw_desc_bh4_5, 32);
+			__m256i rss_hash2_3 =
+				_mm256_slli_epi64(raw_desc_bh2_3, 32);
+			__m256i rss_hash0_1 =
+				_mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+			__m256i rss_hash_msk =
+				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+						 0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash6_7 = _mm256_and_si256
+					(rss_hash6_7, rss_hash_msk);
+			rss_hash4_5 = _mm256_and_si256
+					(rss_hash4_5, rss_hash_msk);
+			rss_hash2_3 = _mm256_and_si256
+					(rss_hash2_3, rss_hash_msk);
+			rss_hash0_1 = _mm256_and_si256
+					(rss_hash0_1, rss_hash_msk);
+
+			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+		} /* if() on RSS hash parsing */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v3 11/11] net/iavf: add RSS hash parsing in SSE path
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (9 preceding siblings ...)
  2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
@ 2020-04-08  6:22   ` Leyi Rong
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-08  6:22 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in SSE data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 86 ++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index d7d840853..85b50a7f1 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -654,7 +654,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
 			(0xFF, 0xFF,
-			 0xFF, 0xFF,  /* rss not supported */
+			 0xFF, 0xFF,  /* rss hash parsed separately */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -745,7 +745,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
 		__m128i mbp1;
@@ -791,8 +791,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		rte_compiler_barrier();
 
 		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -802,12 +806,66 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
-		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m128i rss_hash3 =
+				_mm_slli_epi64(raw_desc_bh3, 32);
+			__m128i rss_hash2 =
+				_mm_slli_epi64(raw_desc_bh2, 32);
+			__m128i rss_hash1 =
+				_mm_slli_epi64(raw_desc_bh1, 32);
+			__m128i rss_hash0 =
+				_mm_slli_epi64(raw_desc_bh0, 32);
+
+			__m128i rss_hash_msk =
+				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash3 = _mm_and_si128
+					(rss_hash3, rss_hash_msk);
+			rss_hash2 = _mm_and_si128
+					(rss_hash2, rss_hash_msk);
+			rss_hash1 = _mm_and_si128
+					(rss_hash1, rss_hash_msk);
+			rss_hash0 = _mm_and_si128
+					(rss_hash0, rss_hash_msk);
+
+			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+		} /* if() on RSS hash parsing */
 
 		/* C.2 get 4 pkts staterr value  */
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -815,14 +873,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-			 pkt_mb4);
+			 pkt_mb3);
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-			 pkt_mb3);
-
-		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+			 pkt_mb2);
 
 		/* C* extract and record EOP bit */
 		if (split_packet) {
@@ -846,9 +900,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-			 pkt_mb2);
+			 pkt_mb1);
 		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-				 pkt_mb1);
+				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (13 preceding siblings ...)
  2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-14  6:15 ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
                     ` (10 more replies)
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
  16 siblings, 11 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

This patchset enable framework for advanced iAVF, includes
flexible descriptor support, FDIR mark id and RSS hash support.

---
v4:
- Avoid initial declaration in 'for' loop to backward-compatible
  with older gcc versions.

v3:
- Remove patch to query DDP package info as it's specific to DCF.
- Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
- Move fdir_enabled flag to per Queue value for cache benefit when
  access in vector routines.
- Store extracted flow_id value to mbuf without judgement to avoid
  branch speculation fail, which is benefit to performance.

v2:
- Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
  it's defined in AVF spec.
- Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
- Move flex desc definitions into iavf_rxtx.h.
- Up to date to match with the latest version of virtchnl.h.
- Extract a new internal func iavf_update_rx_tail to call.
- Remove
  iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
  as the accompanying legacy ones can deal with the flex desc cases.
- Move rxq->rxdid assignment from iavf_configure_queues()
  to iavf_dev_rx_queue_setup().
- Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
  when using GCC compile option -O0.

Leyi Rong (11):
  net/iavf: flexible Rx descriptor definitions
  net/iavf: return error if opcode is mismatched
  net/iavf: flexible Rx descriptor support in normal path
  net/iavf: flexible Rx descriptor support in AVX path
  net/iavf: flexible Rx descriptor support in SSE path
  net/iavf: add flow director enabled switch value
  net/iavf: support flow mark in normal data path
  net/iavf: support flow mark in AVX path
  net/iavf: support flow mark in SSE path
  net/iavf: add RSS hash parsing in AVX path
  net/iavf: add RSS hash parsing in SSE path

 drivers/net/iavf/iavf.h               |   6 +
 drivers/net/iavf/iavf_ethdev.c        |   8 +
 drivers/net/iavf/iavf_rxtx.c          | 540 ++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx.h          | 249 +++++++++
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 700 +++++++++++++++++++++++++-
 drivers/net/iavf/iavf_rxtx_vec_sse.c  | 518 +++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c         |  46 +-
 7 files changed, 2018 insertions(+), 49 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 01/11] net/iavf: flexible Rx descriptor definitions
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
                     ` (9 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Add definitions for flexible Rx descriptor structures and macros.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.h | 200 +++++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 09b5bd99e..5e309631e 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -157,6 +157,206 @@ union iavf_tx_offload {
 	};
 };
 
+/* Rx Flex Descriptors
+ * These descriptors are used instead of the legacy version descriptors
+ */
+union iavf_16b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+	} wb; /* writeback */
+};
+
+union iavf_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 16-21
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Flow ID upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 22-23 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms_ovs {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 flow_id;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 rss_hash;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum iavf_rxdid {
+	IAVF_RXDID_LEGACY_0		= 0,
+	IAVF_RXDID_LEGACY_1		= 1,
+	IAVF_RXDID_FLEX_NIC		= 2,
+	IAVF_RXDID_FLEX_NIC_2		= 6,
+	IAVF_RXDID_HW			= 7,
+	IAVF_RXDID_COMMS_GENERIC	= 16,
+	IAVF_RXDID_COMMS_AUX_VLAN	= 17,
+	IAVF_RXDID_COMMS_AUX_IPV4	= 18,
+	IAVF_RXDID_COMMS_AUX_IPV6	= 19,
+	IAVF_RXDID_COMMS_AUX_IPV6_FLOW	= 20,
+	IAVF_RXDID_COMMS_AUX_TCP	= 21,
+	IAVF_RXDID_COMMS_OVS_1		= 22,
+	IAVF_RXDID_COMMS_OVS_2		= 23,
+	IAVF_RXDID_LAST			= 63,
+};
+
+enum iavf_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	IAVF_RX_FLEX_DESC_STATUS0_EOF_S,
+	IAVF_RX_FLEX_DESC_STATUS0_HBO_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LPBK_S,
+	IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RXE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_CRCP_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
+#define IAVF_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for iavf_32b_rx_flex_desc.pkt_len member */
+#define IAVF_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
 int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			   uint16_t queue_idx,
 			   uint16_t nb_desc,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 02/11] net/iavf: return error if opcode is mismatched
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
                     ` (8 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Adds error return when the opcode of read message is
mismatched which is received from adminQ.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_vchnl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index fa4da3a6d..b7fb05d32 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len,
 	PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d",
 		    opcode, vf->cmd_retval);
 
-	if (opcode != vf->pend_cmd)
+	if (opcode != vf->pend_cmd) {
 		PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u",
 			    vf->pend_cmd, opcode);
+		return IAVF_ERR_OPCODE_MISMATCH;
+	}
 
 	return IAVF_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 03/11] net/iavf: flexible Rx descriptor support in normal path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
                     ` (7 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in normal
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |   2 +
 drivers/net/iavf/iavf_ethdev.c |   8 +
 drivers/net/iavf/iavf_rxtx.c   | 479 ++++++++++++++++++++++++++++++---
 drivers/net/iavf/iavf_rxtx.h   |   8 +
 drivers/net/iavf/iavf_vchnl.c  |  42 ++-
 5 files changed, 501 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 368ddf604..fdb31b929 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -103,6 +103,7 @@ struct iavf_info {
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
+	uint64_t supported_rxdid;
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -235,6 +236,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_get_supported_rxdid(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
 int iavf_dev_link_update(struct rte_eth_dev *dev,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 9dadee3d6..4280a92bb 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1245,6 +1245,14 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+		if (iavf_get_supported_rxdid(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+			goto err_rss;
+		}
+	}
+
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 9eccb7c41..67297dcb7 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -346,6 +346,14 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	if (vf->vf_res->vf_cap_flags &
+	    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+	    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+		rxq->rxdid = IAVF_RXDID_COMMS_OVS_1;
+	} else {
+		rxq->rxdid = IAVF_RXDID_LEGACY_1;
+	}
+
 	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_free_thresh;
@@ -720,6 +728,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 	}
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+			  volatile union iavf_rx_flex_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -754,6 +776,87 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+/* Translate the rx flex descriptor status to pkt flags */
+static inline void
+iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
+		       volatile union iavf_rx_flex_desc *rxdp)
+{
+	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+	uint16_t stat_err;
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	stat_err = rte_le_to_cpu_16(desc->status_error0);
+	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+		mb->ol_flags |= PKT_RX_RSS_HASH;
+		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+	}
+#endif
+}
+
+#define IAVF_RX_FLEX_ERR0_BITS	\
+	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+	uint64_t flags = 0;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+		return 0;
+
+	if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
+/* If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register. Update the RDT with the value of the last processed RX
+ * descriptor minus 1, to guarantee that the RDT register is never
+ * equal to the RDH register, which creates a "full" ring situtation
+ * from the hardware point of view.
+ */
+static inline void
+iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+{
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG,
+			   "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -854,23 +957,256 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	}
 	rxq->rx_tail = rx_id;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the receive tail register of queue.
-	 * Update that register with the value of the last processed RX
-	 * descriptor minus 1.
-	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)((rx_id == 0) ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	volatile union iavf_rx_desc *rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint16_t rx_stat_err0;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit first */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
+		iavf_rxd_to_pkt_fields(rxm, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
 	}
-	rxq->nb_rx_hold = nb_hold;
+	rxq->rx_tail = rx_id;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint16_t rx_stat_err0;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+			if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(RTE_ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else {
+				rxm->data_len = (uint16_t)(rx_packet_len -
+							RTE_ETHER_CRC_LEN);
+			}
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
+		iavf_rxd_to_pkt_fields(first_seg, &rxd);
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
 
 	return nb_rx;
 }
@@ -1027,30 +1363,88 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->pkt_first_seg = first_seg;
 	rxq->pkt_last_seg = last_seg;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
-	 * register. Update the RDT with the value of the last processed RX
-	 * descriptor minus 1, to guarantee that the RDT register is never
-	 * equal to the RDH register, which creates a "full" ring situtation
-	 * from the hardware point of view.
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+#define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *mb;
+	uint16_t stat_err0;
+	uint16_t pkt_len;
+	int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+	int32_t i, j, nb_rx = 0;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+	/* Make sure there is at least 1 packet to receive */
+	if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* Scan LOOK_AHEAD descriptors at a time to determine which
+	 * descriptors reference packets that are ready to be received.
 	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)(rx_id == 0 ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+	     rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+			s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+			nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf parameters */
+		for (j = 0; j < nb_dd; j++) {
+			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+					  rxq->rx_tail +
+					  i * IAVF_LOOK_AHEAD + j);
+
+			mb = rxep[j];
+			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->ol_flags = 0;
+
+			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
+			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
+			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+			mb->ol_flags |= pkt_flags;
+		}
+
+		for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+			rxq->rx_stage[i + j] = rxep[j];
+
+		if (nb_dd != IAVF_LOOK_AHEAD)
+			break;
 	}
-	rxq->nb_rx_hold = nb_hold;
+
+	/* Clear software ring entries */
+	for (i = 0; i < nb_rx; i++)
+		rxq->sw_ring[rxq->rx_tail + i] = NULL;
 
 	return nb_rx;
 }
 
-#define IAVF_LOOK_AHEAD 8
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1219,7 +1613,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (rxq->rx_nb_avail)
 		return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-	nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+	else
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
 	rxq->rx_next_avail = 0;
 	rxq->rx_nb_avail = nb_rx;
 	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1663,6 +2060,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
 	struct iavf_rx_queue *rxq;
 	int i;
@@ -1702,7 +2100,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (dev->data->scattered_rx) {
 		PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
 			    dev->data->port_id);
@@ -1710,7 +2111,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	} else {
 		PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_pkts;
 	}
 }
 
@@ -1797,6 +2201,7 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 5e309631e..f33d1df41 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -62,6 +62,7 @@
 #define iavf_rx_desc iavf_16byte_rx_desc
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
+#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
 #endif
 
 struct iavf_rxq_ops {
@@ -87,6 +88,7 @@ struct iavf_rx_queue {
 	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
 	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
 	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
+	uint8_t rxdid;
 
 	/* used for VPMD */
 	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
@@ -379,9 +381,15 @@ void iavf_dev_tx_queue_release(void *txq);
 void iavf_stop_queues(struct rte_eth_dev *dev);
 uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts(void *rx_queue,
 				 struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		       uint16_t nb_pkts);
 uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index b7fb05d32..3f0d23a92 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 * add advanced/optional offload capabilities
 	 */
 
-	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+		VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -375,6 +377,32 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	return 0;
 }
 
+int
+iavf_get_supported_rxdid(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_SUPPORTED_RXDIDS");
+		return ret;
+	}
+
+	vf->supported_rxdid =
+		((struct virtchnl_supported_rxdids *)args.out_buffer)->supported_rxdids;
+
+	return 0;
+}
+
 int
 iavf_enable_queues(struct iavf_adapter *adapter)
 {
@@ -567,6 +595,18 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 04/11] net/iavf: flexible Rx descriptor support in AVX path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (2 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
                     ` (6 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in AVX
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c          |  24 +-
 drivers/net/iavf/iavf_rxtx.h          |   6 +
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 550 +++++++++++++++++++++++++-
 3 files changed, 570 insertions(+), 10 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 67297dcb7..34c41d104 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2081,16 +2081,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				    "Using %sVector Scattered Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_scattered_pkts_vec_avx2 :
-					    iavf_recv_scattered_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+					iavf_recv_scattered_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2 :
+					iavf_recv_scattered_pkts_vec;
 		} else {
 			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_pkts_vec_avx2 :
-					    iavf_recv_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2_flex_rxd :
+					iavf_recv_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2 :
+					iavf_recv_pkts_vec;
 		}
 
 		return;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index f33d1df41..8e1db2588 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -413,9 +413,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					  struct rte_mbuf **rx_pkts,
+					  uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue,
 					   struct rte_mbuf **rx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+						    struct rte_mbuf **rx_pkts,
+						    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 2587083d8..b23188fd3 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -11,14 +11,16 @@
 #endif
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
 	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) {
+		volatile union iavf_rx_flex_desc *rxdp =
+			(union iavf_rx_flex_desc *)rxdp;
+	}
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -160,7 +162,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	 * of time to act
 	 */
 	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
-		iavf_rxq_rearm(rxq);
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
@@ -614,6 +616,465 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline uint16_t
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts, uint8_t *split_packet)
+{
+#define IAVF_DESCS_PER_LOOP_AVX 8
+
+	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
+			0, rxq->mbuf_initializer);
+	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_flex_desc *rxdp =
+		(union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+		/* iavf_rxq_rearm(rxq); */
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* constants used in processing loop */
+	const __m256i crc_adjust =
+		_mm256_set_epi16
+			(/* first descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0,          /* ignore pkt_type field */
+			 /* second descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0           /* ignore pkt_type field */
+			);
+
+	/* 8 packets DD mask, LSB in each 32-bit value */
+	const __m256i dd_check = _mm256_set1_epi32(1);
+
+	/* 8 packets EOP mask, second-LSB in each 32-bit value */
+	const __m256i eop_check = _mm256_slli_epi32(dd_check,
+			IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+
+	/* mask to shuffle from desc. to mbuf (2 descriptors)*/
+	const __m256i shuf_msk =
+		_mm256_set_epi8
+			(/* first descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF,	/*pkt_type set as unknown */
+			 /* second descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF	/*pkt_type set as unknown */
+			);
+	/**
+	 * compile-time check the above crc and shuffle layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi
+	 * calls above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Status/Error flag masks */
+	/**
+	 * mask everything except Checksum Reports, RSS indication
+	 * and VLAN indication.
+	 * bit6:4 for IP/L4 checksum errors.
+	 * bit12 is for RSS indication.
+	 * bit13 is for VLAN indication.
+	 */
+	const __m256i flags_mask =
+		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+	/**
+	 * data to be shuffled by the result of the flags mask shifted by 4
+	 * bits.  This gives use the l3_l4 flags.
+	 */
+	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			/* second 128-bits */
+			0, 0, 0, 0, 0, 0, 0, 0,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i cksum_mask =
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+				   PKT_RX_EIP_CKSUM_BAD);
+	/**
+	 * data to be shuffled by result of flag mask, shifted down 12.
+	 * If RSS(bit12)/VLAN(bit13) are set,
+	 * shuffle moves appropriate flags in place.
+	 */
+	const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0,
+			/* end up 128-bits */
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	uint16_t i, received;
+
+	for (i = 0, received = 0; i < nb_pkts;
+	     i += IAVF_DESCS_PER_LOOP_AVX,
+	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+				    _mm256_loadu_si256((void *)&sw_ring[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256
+			((void *)&rx_pkts[i + 4],
+			 _mm256_loadu_si256((void *)&sw_ring[i + 4]));
+#endif
+
+		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
+
+		if (split_packet) {
+			int j;
+
+			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		/**
+		 * convert descriptors 4-7 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk);
+		__m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk);
+
+		mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust);
+		mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptype_mask =
+			_mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M);
+		const __m256i ptypes6_7 =
+			_mm256_and_si256(raw_desc6_7, ptype_mask);
+		const __m256i ptypes4_5 =
+			_mm256_and_si256(raw_desc4_5, ptype_mask);
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1);
+
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4);
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0);
+		/* merge the status bits into one register */
+		const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7,
+				raw_desc4_5);
+
+		/**
+		 * convert descriptors 0-3 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk);
+		__m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk);
+
+		mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust);
+		mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptypes2_3 =
+			_mm256_and_si256(raw_desc2_3, ptype_mask);
+		const __m256i ptypes0_1 =
+			_mm256_and_si256(raw_desc0_1, ptype_mask);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1);
+
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4);
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0);
+		/* merge the status bits into one register */
+		const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3,
+								raw_desc0_1);
+
+		/**
+		 * take the two sets of status bits and merge to one
+		 * After merge, the packets status flags are in the
+		 * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]
+		 */
+		__m256i status0_7 = _mm256_unpacklo_epi64(status4_7,
+							  status0_3);
+
+		/* now do flag manipulation */
+
+		/* get only flag/error bits we want */
+		const __m256i flag_bits =
+			_mm256_and_si256(status0_7, flags_mask);
+		/**
+		 * l3_l4_error flags, shuffle, then shift to correct adjustment
+		 * of flags in flags_shuf, and finally mask out extra bits
+		 */
+		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
+				_mm256_srli_epi32(flag_bits, 4));
+		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
+		/* set rss and vlan flags */
+		const __m256i rss_vlan_flag_bits =
+			_mm256_srli_epi32(flag_bits, 12);
+		const __m256i rss_vlan_flags =
+			_mm256_shuffle_epi8(rss_vlan_flags_shuf,
+					    rss_vlan_flag_bits);
+
+		/* merge flags */
+		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+				rss_vlan_flags);
+		/**
+		 * At this point, we have the 8 sets of flags in the low 16-bits
+		 * of each 32-bit value in vlan0.
+		 * We want to extract these, and merge them with the mbuf init
+		 * data so we can do a single write to the mbuf to set the flags
+		 * and all the other initialization fields. Extracting the
+		 * appropriate flags means that we have to do a shift and blend
+		 * for each mbuf before we do the write. However, we can also
+		 * add in the previously computed rx_descriptor fields to
+		 * make a single 256-bit write per mbuf
+		 */
+		/* check the structure matches expectations */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				 offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				 RTE_ALIGN(offsetof(struct rte_mbuf,
+						    rearm_data),
+					   16));
+		/* build up data and do writes */
+		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
+			rearm6, rearm7;
+		rearm6 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 8),
+					    0x04);
+		rearm4 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 4),
+					    0x04);
+		rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm0 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(mbuf_flags, 4),
+					    0x04);
+		/* permute to add in the rx_descriptor e.g. rss fields */
+		rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20);
+		rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20);
+		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
+		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
+		/* write to mbuf */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,
+				    rearm6);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,
+				    rearm4);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,
+				    rearm2);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,
+				    rearm0);
+
+		/* repeat for the odd mbufs */
+		const __m256i odd_flags =
+			_mm256_castsi128_si256
+				(_mm256_extracti128_si256(mbuf_flags, 1));
+		rearm7 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 8),
+					    0x04);
+		rearm5 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 4),
+					    0x04);
+		rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04);
+		rearm1 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(odd_flags, 4),
+					    0x04);
+		/* since odd mbufs are already in hi 128-bits use blend */
+		rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0);
+		rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0);
+		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
+		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
+		/* again write to mbufs */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,
+				    rearm7);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,
+				    rearm5);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,
+				    rearm3);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,
+				    rearm1);
+
+		/* extract and record EOP bit */
+		if (split_packet) {
+			const __m128i eop_mask =
+				_mm_set1_epi16(1 <<
+					       IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+			const __m256i eop_bits256 = _mm256_and_si256(status0_7,
+								     eop_check);
+			/* pack status bits into a single 128-bit register */
+			const __m128i eop_bits =
+				_mm_packus_epi32
+					(_mm256_castsi256_si128(eop_bits256),
+					 _mm256_extractf128_si256(eop_bits256,
+								  1));
+			/**
+			 * flip bits, and mask out the EOP bit, which is now
+			 * a split-packet bit i.e. !EOP, rather than EOP one.
+			 */
+			__m128i split_bits = _mm_andnot_si128(eop_bits,
+					eop_mask);
+			/**
+			 * eop bits are out of order, so we need to shuffle them
+			 * back into order again. In doing so, only use low 8
+			 * bits, which acts like another pack instruction
+			 * The original order is (hi->lo): 1,3,5,7,0,2,4,6
+			 * [Since we use epi8, the 16-bit positions are
+			 * multiplied by 2 in the eop_shuffle value.]
+			 */
+			__m128i eop_shuffle =
+				_mm_set_epi8(/* zero hi 64b */
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     /* move values to lo 64b */
+					     8, 0, 10, 2,
+					     12, 4, 14, 6);
+			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
+			*(uint64_t *)split_packet =
+				_mm_cvtsi128_si64(split_bits);
+			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+		}
+
+		/* perform dd_check */
+		status0_7 = _mm256_and_si256(status0_7, dd_check);
+		status0_7 = _mm256_packs_epi32(status0_7,
+					       _mm256_setzero_si256());
+
+		uint64_t burst = __builtin_popcountll
+					(_mm_cvtsi128_si64
+						(_mm256_extracti128_si256
+							(status0_7, 1)));
+		burst += __builtin_popcountll
+				(_mm_cvtsi128_si64
+					(_mm256_castsi256_si128(status0_7)));
+		received += burst;
+		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+			break;
+	}
+
+	/* update tail pointers */
+	rxq->rx_tail += received;
+	rxq->rx_tail &= (rxq->nb_rx_desc - 1);
+	if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */
+		rxq->rx_tail--;
+		received--;
+	}
+	rxq->rxrearm_nb += received;
+	return received;
+}
+
 /**
  * Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
@@ -625,6 +1086,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/**
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts,
+						     nb_pkts, NULL);
+}
+
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
@@ -690,6 +1163,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_pkts + retval, nb_pkts);
 }
 
+/**
+ * vPMD receive routine that reassembles single burst of
+ * 32 scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+static uint16_t
+iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
+					    struct rte_mbuf **rx_pkts,
+					    uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
+					rx_pkts, nb_pkts, split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+					     &split_flags[i]);
+}
+
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD.
+ * Main receive routine that can handle arbitrary burst sizes
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst =
+			iavf_recv_scattered_burst_vec_avx2_flex_rxd
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
+				rx_pkts + retval, nb_pkts);
+}
+
 static inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 05/11] net/iavf: flexible Rx descriptor support in SSE path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (3 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 06/11] net/iavf: add flow director enabled switch value Leyi Rong
                     ` (5 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in SSE
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c         |   4 +-
 drivers/net/iavf/iavf_rxtx.h         |   5 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 34c41d104..09ce5e3f3 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2085,7 +2085,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
-					iavf_recv_scattered_pkts_vec;
+					iavf_recv_scattered_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2 :
@@ -2098,7 +2098,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2_flex_rxd :
-					iavf_recv_pkts_vec;
+					iavf_recv_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2 :
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 8e1db2588..290dd68c1 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -406,9 +406,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				     uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+					       struct rte_mbuf **rx_pkts,
+					       uint16_t nb_pkts);
 uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 0365c49e1..9c1f2a445 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline void
+flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+		       struct rte_mbuf **rx_pkts)
+{
+	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	__m128i rearm0, rearm1, rearm2, rearm3;
+
+	__m128i tmp_desc, flags, rss_vlan;
+
+	/* mask everything except checksum, RSS and VLAN flags.
+	 * bit6:4 for checksum.
+	 * bit12 for RSS indication.
+	 * bit13 for VLAN indication.
+	 */
+	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
+						0x3070, 0x3070);
+
+	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD);
+
+	/* map the checksum, rss and vlan fields to the checksum, rss
+	 * and vlan flag
+	 */
+	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+
+	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	/* merge 4 descriptors */
+	flags = _mm_unpackhi_epi32(descs[0], descs[1]);
+	tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]);
+	tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc);
+	tmp_desc = _mm_and_si128(flags, desc_mask);
+
+	/* checksum flags */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 4);
+	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
+	/* then we shift left 1 bit */
+	flags = _mm_slli_epi32(flags, 1);
+	/* we need to mask out the reduntant bits introduced by RSS or
+	 * VLAN fields.
+	 */
+	flags = _mm_and_si128(flags, cksum_mask);
+
+	/* RSS, VLAN flag */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 8);
+	rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc);
+
+	/* merge the flags */
+	flags = _mm_or_si128(flags, rss_vlan);
+
+	/**
+	 * At this point, we have the 4 sets of flags in the low 16-bits
+	 * of each 32-bit value in flags.
+	 * We want to extract these, and merge them with the mbuf init data
+	 * so we can do a single 16-byte write to the mbuf to set the flags
+	 * and all the other initialization fields. Extracting the
+	 * appropriate flags means that we have to do a shift and blend for
+	 * each mbuf before we do the write.
+	 */
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+
+	/* write the rearm data and the olflags in one write */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_mbuf, rearm_data) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
 #define PKTLEN_SHIFT     10
 
 static inline void
@@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)];
 }
 
+static inline void
+flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
+		     const uint32_t *type_table)
+{
+	const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M);
+	__m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
+	__m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
+	__m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
+
+	ptype_all = _mm_and_si128(ptype_all, ptype_mask);
+
+	rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)];
+	rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)];
+	rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)];
+	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
+}
+
 /* Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+			    struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	__m128i crc_adjust = _mm_set_epi16
+				(0, 0, 0,       /* ignore non-length fields */
+				 -rxq->crc_len, /* sub crc on data_len */
+				 0,          /* ignore high-16bits of pkt_len */
+				 -rxq->crc_len, /* sub crc on pkt_len */
+				 0, 0           /* ignore pkt_type field */
+				);
+	const __m128i zero = _mm_setzero_si128();
+	/* mask to shuffle from desc. to mbuf */
+	const __m128i shuf_msk = _mm_set_epi8
+			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
+			 5, 4,        /* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			 5, 4,        /* octet 4~5, low 16 bits pkt_len */
+			 0xFF, 0xFF,  /* pkt_type set as unknown */
+			 0xFF, 0xFF   /* pkt_type set as unknown */
+			);
+	const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0x04, 0x0C,
+						   0x00, 0x08);
+
+	/**
+	 * compile-time check the above crc_adjust layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi16
+	 * call above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
+	/* 4 packets DD mask */
+	const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL,
+						0x0000000100000001LL);
+	/* 4 packets EOP mask */
+	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
+						 0x0000000200000002LL);
+
+	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
+
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > rxq->rx_free_thresh)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/**
+	 * Compile-time verify the shuffle mask
+	 * NOTE: some field positions already verified above, but duplicated
+	 * here for completeness in case of future modifications.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+	     pos += IAVF_VPMD_DESCS_PER_LOOP,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i staterr, sterr_tmp1, sterr_tmp2;
+		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
+		__m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+		__m128i mbp2;
+#endif
+
+		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.1 load 2 64 bit mbuf points */
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+#endif
+
+		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+		rte_compiler_barrier();
+		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
+
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+			 pkt_mb4);
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+			 pkt_mb3);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			/* and with mask to extract bits, flipping 1-0 */
+			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = _mm_and_si128(staterr, dd_check);
+		staterr = _mm_packs_epi32(staterr, zero);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+			 pkt_mb2);
+		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+				 pkt_mb1);
+		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		nb_pkts_recd += var;
+		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
 /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
@@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
+/* vPMD receive routine that reassembles scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	unsigned int i = 0;
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
+					      split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 06/11] net/iavf: add flow director enabled switch value
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (4 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 07/11] net/iavf: support flow mark in normal data path Leyi Rong
                     ` (4 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

The commit adds fdir_enabled flag into iavf_rx_queue structure
to identify if fdir id is active. Rx data path can be benefit if
fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index fdb31b929..17ceff734 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -144,6 +144,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint16_t fdir_ref_cnt;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 290dd68c1..dd5544ee2 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -103,6 +103,7 @@ struct iavf_rx_queue {
 
 	uint16_t port_id;        /* device port ID */
 	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
+	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
 	uint16_t queue_id;      /* Rx queue index */
 	uint16_t rx_buf_len;    /* The packet buffer size */
 	uint16_t rx_hdr_len;    /* The header buffer size */
@@ -490,6 +491,35 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+#define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
+	int i; \
+	for (i = 0; i < (ad)->eth_dev->data->nb_rx_queues; i++) { \
+		struct iavf_rx_queue *rxq = (ad)->eth_dev->data->rx_queues[i]; \
+		if (!rxq) \
+			continue; \
+		rxq->fdir_enabled = on; \
+	} \
+	PMD_DRV_LOG(DEBUG, "FDIR processing on RX set to %d", on); \
+} while (0)
+
+/* Enable/disable flow director Rx processing in data path. */
+static inline
+void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on)
+{
+	if (on) {
+		/* enable flow director processing */
+		if (ad->fdir_ref_cnt++ == 0)
+			FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+	} else {
+		if (ad->fdir_ref_cnt >= 1) {
+			ad->fdir_ref_cnt--;
+
+			if (ad->fdir_ref_cnt == 0)
+				FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+		}
+	}
+}
+
 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 07/11] net/iavf: support flow mark in normal data path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (5 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 06/11] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 08/11] net/iavf: support flow mark in AVX path Leyi Rong
                     ` (3 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing in normal path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  3 +++
 drivers/net/iavf/iavf_rxtx.c | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 17ceff734..78bdaff20 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -67,6 +67,9 @@
 #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define IAVF_48_BIT_MASK  RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t)
 
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 09ce5e3f3..725dd9e45 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -756,6 +756,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 					IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
 			IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+	/* Check if FDIR Match */
+	flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+				PKT_RX_FDIR : 0);
+
 	if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
 		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 		return flags;
@@ -776,6 +780,25 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+	uint64_t flags = 0;
+	uint16_t flexbh;
+
+	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+	if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+		mb->hash.fdir.hi =
+			rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+		flags |= PKT_RX_FDIR_ID;
+	}
+
+	return flags;
+}
+
 /* Translate the rx flex descriptor status to pkt flags */
 static inline void
 iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
@@ -792,6 +815,11 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
 #endif
+
+	if (desc->flow_id != 0xFFFFFFFF) {
+		mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+	}
 }
 
 #define IAVF_RX_FLEX_ERR0_BITS	\
@@ -951,6 +979,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1349,6 +1380,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			first_seg->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
 		first_seg->ol_flags |= pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1515,6 +1549,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 				mb->hash.rss = rte_le_to_cpu_32(
 					rxdp[j].wb.qword0.hi_dword.rss);
 
+			if (pkt_flags & PKT_RX_FDIR)
+				pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
 			mb->ol_flags |= pkt_flags;
 		}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 08/11] net/iavf: support flow mark in AVX path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (6 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 07/11] net/iavf: support flow mark in normal data path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 09/11] net/iavf: support flow mark in SSE path Leyi Rong
                     ` (2 subsequent siblings)
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in AVX path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 72 +++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index b23188fd3..3bf5833fa 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -616,6 +616,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline __m256i
+flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC);
+	__m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask);
+	const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline uint16_t
 _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
@@ -678,8 +697,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m256i shuf_msk =
 		_mm256_set_epi8
 			(/* first descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -687,8 +706,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/* pkt_type set as unknown */
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -930,8 +949,51 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 					    rss_vlan_flag_bits);
 
 		/* merge flags */
-		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+		__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
 				rss_vlan_flags);
+
+		if (rxq->fdir_enabled) {
+			const __m256i fdir_id4_7 =
+				_mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5);
+
+			const __m256i fdir_id0_3 =
+				_mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1);
+
+			const __m256i fdir_id0_7 =
+				_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);
+
+			const __m256i fdir_flags =
+				flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7);
+
+			/* merge with fdir_flags */
+			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);
+
+			/* write to mbuf: have to use scalar store here */
+			rx_pkts[i + 0]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 3);
+
+			rx_pkts[i + 1]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 7);
+
+			rx_pkts[i + 2]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 2);
+
+			rx_pkts[i + 3]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 6);
+
+			rx_pkts[i + 4]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 1);
+
+			rx_pkts[i + 5]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 5);
+
+			rx_pkts[i + 6]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 0);
+
+			rx_pkts[i + 7]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 4);
+		} /* if() on fdir_enabled */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 09/11] net/iavf: support flow mark in SSE path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (7 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 08/11] net/iavf: support flow mark in AVX path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in SSE path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 52 +++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 9c1f2a445..d7d840853 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline __m128i
+flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC);
+	__m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask);
+	const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
@@ -267,6 +286,36 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
+	if (rxq->fdir_enabled) {
+		const __m128i fdir_id0_1 =
+			_mm_unpackhi_epi32(descs[0], descs[1]);
+
+		const __m128i fdir_id2_3 =
+			_mm_unpackhi_epi32(descs[2], descs[3]);
+
+		const __m128i fdir_id0_3 =
+			_mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3);
+
+		const __m128i fdir_flags =
+			flex_rxd_to_fdir_flags_vec(fdir_id0_3);
+
+		/* merge with fdir_flags */
+		flags = _mm_or_si128(flags, fdir_flags);
+
+		/* write fdir_id to mbuf */
+		rx_pkts[0]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 0);
+
+		rx_pkts[1]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 1);
+
+		rx_pkts[2]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 2);
+
+		rx_pkts[3]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 3);
+	} /* if() on fdir_enabled */
+
 	/**
 	 * At this point, we have the 4 sets of flags in the low 16-bits
 	 * of each 32-bit value in flags.
@@ -604,7 +653,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i zero = _mm_setzero_si128();
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
-			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			(0xFF, 0xFF,
+			 0xFF, 0xFF,  /* rss not supported */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 10/11] net/iavf: add RSS hash parsing in AVX path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (8 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 09/11] net/iavf: support flow mark in SSE path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in AVX data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 3bf5833fa..22f1b7887 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		_mm256_set_epi8
 			(/* first descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				_mm256_extract_epi32(fdir_id0_7, 4);
 		} /* if() on fdir_enabled */
 
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh7 =
+				_mm_load_si128
+					((void *)(&rxdp[7].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh6 =
+				_mm_load_si128
+					((void *)(&rxdp[6].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh5 =
+				_mm_load_si128
+					((void *)(&rxdp[5].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh4 =
+				_mm_load_si128
+					((void *)(&rxdp[4].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			__m256i raw_desc_bh6_7 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh6),
+					raw_desc_bh7, 1);
+			__m256i raw_desc_bh4_5 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh4),
+					raw_desc_bh5, 1);
+			__m256i raw_desc_bh2_3 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh2),
+					raw_desc_bh3, 1);
+			__m256i raw_desc_bh0_1 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh0),
+					raw_desc_bh1, 1);
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m256i rss_hash6_7 =
+				_mm256_slli_epi64(raw_desc_bh6_7, 32);
+			__m256i rss_hash4_5 =
+				_mm256_slli_epi64(raw_desc_bh4_5, 32);
+			__m256i rss_hash2_3 =
+				_mm256_slli_epi64(raw_desc_bh2_3, 32);
+			__m256i rss_hash0_1 =
+				_mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+			__m256i rss_hash_msk =
+				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+						 0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash6_7 = _mm256_and_si256
+					(rss_hash6_7, rss_hash_msk);
+			rss_hash4_5 = _mm256_and_si256
+					(rss_hash4_5, rss_hash_msk);
+			rss_hash2_3 = _mm256_and_si256
+					(rss_hash2_3, rss_hash_msk);
+			rss_hash0_1 = _mm256_and_si256
+					(rss_hash0_1, rss_hash_msk);
+
+			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+		} /* if() on RSS hash parsing */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v4 11/11] net/iavf: add RSS hash parsing in SSE path
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (9 preceding siblings ...)
  2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
@ 2020-04-14  6:15   ` Leyi Rong
  10 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-14  6:15 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in SSE data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 86 ++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index d7d840853..85b50a7f1 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -654,7 +654,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
 			(0xFF, 0xFF,
-			 0xFF, 0xFF,  /* rss not supported */
+			 0xFF, 0xFF,  /* rss hash parsed separately */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -745,7 +745,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
 		__m128i mbp1;
@@ -791,8 +791,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		rte_compiler_barrier();
 
 		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -802,12 +806,66 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
-		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m128i rss_hash3 =
+				_mm_slli_epi64(raw_desc_bh3, 32);
+			__m128i rss_hash2 =
+				_mm_slli_epi64(raw_desc_bh2, 32);
+			__m128i rss_hash1 =
+				_mm_slli_epi64(raw_desc_bh1, 32);
+			__m128i rss_hash0 =
+				_mm_slli_epi64(raw_desc_bh0, 32);
+
+			__m128i rss_hash_msk =
+				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash3 = _mm_and_si128
+					(rss_hash3, rss_hash_msk);
+			rss_hash2 = _mm_and_si128
+					(rss_hash2, rss_hash_msk);
+			rss_hash1 = _mm_and_si128
+					(rss_hash1, rss_hash_msk);
+			rss_hash0 = _mm_and_si128
+					(rss_hash0, rss_hash_msk);
+
+			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+		} /* if() on RSS hash parsing */
 
 		/* C.2 get 4 pkts staterr value  */
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -815,14 +873,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-			 pkt_mb4);
+			 pkt_mb3);
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-			 pkt_mb3);
-
-		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+			 pkt_mb2);
 
 		/* C* extract and record EOP bit */
 		if (split_packet) {
@@ -846,9 +900,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-			 pkt_mb2);
+			 pkt_mb1);
 		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-				 pkt_mb1);
+				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (14 preceding siblings ...)
  2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-16  8:09 ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
                     ` (11 more replies)
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
  16 siblings, 12 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

This patchset enable framework for advanced iAVF, includes
flexible descriptor support, FDIR mark id and RSS hash support.

---
v5:
- Set rxdid = 0 in iavf_configure_queues() when it's supported and
  RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y. So the error can be captured
  explicitly as kernel PF does not support it yet.
- Fix some build errors when RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y.

v4:
- Avoid initial declaration in 'for' loop to backward-compatible
  with older gcc versions.

v3:
- Remove patch to query DDP package info as it's specific to DCF.
- Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
- Move fdir_enabled flag to per Queue value for cache benefit when
  access in vector routines.
- Store extracted flow_id value to mbuf without judgement to avoid
  branch speculation fail, which is benefit to performance.

v2:
- Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
  it's defined in AVF spec.
- Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
- Move flex desc definitions into iavf_rxtx.h.
- Up to date to match with the latest version of virtchnl.h.
- Extract a new internal func iavf_update_rx_tail to call.
- Remove
  iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
  as the accompanying legacy ones can deal with the flex desc cases.
- Move rxq->rxdid assignment from iavf_configure_queues()
  to iavf_dev_rx_queue_setup().
- Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
  when using GCC compile option -O0.

Leyi Rong (11):
  net/iavf: flexible Rx descriptor definitions
  net/iavf: return error if opcode is mismatched
  net/iavf: flexible Rx descriptor support in normal path
  net/iavf: flexible Rx descriptor support in AVX path
  net/iavf: flexible Rx descriptor support in SSE path
  net/iavf: add flow director enabled switch value
  net/iavf: support flow mark in normal data path
  net/iavf: support flow mark in AVX path
  net/iavf: support flow mark in SSE path
  net/iavf: add RSS hash parsing in AVX path
  net/iavf: add RSS hash parsing in SSE path

 drivers/net/iavf/iavf.h               |   6 +
 drivers/net/iavf/iavf_ethdev.c        |   8 +
 drivers/net/iavf/iavf_rxtx.c          | 546 ++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx.h          | 250 +++++++++
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 702 +++++++++++++++++++++++++-
 drivers/net/iavf/iavf_rxtx_vec_sse.c  | 520 +++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c         |  59 ++-
 7 files changed, 2042 insertions(+), 49 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 01/11] net/iavf: flexible Rx descriptor definitions
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Add definitions for flexible Rx descriptor structures and macros.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.h | 200 +++++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 09b5bd99e..d04233bf8 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -157,6 +157,206 @@ union iavf_tx_offload {
 	};
 };
 
+/* Rx Flex Descriptors
+ * These descriptors are used instead of the legacy version descriptors
+ */
+union iavf_16b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+	} wb; /* writeback */
+};
+
+union iavf_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor
+ * RxDID Profile ID 16-21
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Flow ID upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Rx Flex Descriptor
+ * RxDID Profile ID 22-23 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms_ovs {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 flow_id;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 rss_hash;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum iavf_rxdid {
+	IAVF_RXDID_LEGACY_0		= 0,
+	IAVF_RXDID_LEGACY_1		= 1,
+	IAVF_RXDID_FLEX_NIC		= 2,
+	IAVF_RXDID_FLEX_NIC_2		= 6,
+	IAVF_RXDID_HW			= 7,
+	IAVF_RXDID_COMMS_GENERIC	= 16,
+	IAVF_RXDID_COMMS_AUX_VLAN	= 17,
+	IAVF_RXDID_COMMS_AUX_IPV4	= 18,
+	IAVF_RXDID_COMMS_AUX_IPV6	= 19,
+	IAVF_RXDID_COMMS_AUX_IPV6_FLOW	= 20,
+	IAVF_RXDID_COMMS_AUX_TCP	= 21,
+	IAVF_RXDID_COMMS_OVS_1		= 22,
+	IAVF_RXDID_COMMS_OVS_2		= 23,
+	IAVF_RXDID_LAST			= 63,
+};
+
+enum iavf_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	IAVF_RX_FLEX_DESC_STATUS0_EOF_S,
+	IAVF_RX_FLEX_DESC_STATUS0_HBO_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LPBK_S,
+	IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RXE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_CRCP_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
+#define IAVF_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for iavf_32b_rx_flex_desc.pkt_len member */
+#define IAVF_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
 int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			   uint16_t queue_idx,
 			   uint16_t nb_desc,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 02/11] net/iavf: return error if opcode is mismatched
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Adds error return when the opcode of read message is
mismatched which is received from adminQ.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_vchnl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index fa4da3a6d..b7fb05d32 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len,
 	PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d",
 		    opcode, vf->cmd_retval);
 
-	if (opcode != vf->pend_cmd)
+	if (opcode != vf->pend_cmd) {
 		PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u",
 			    vf->pend_cmd, opcode);
+		return IAVF_ERR_OPCODE_MISMATCH;
+	}
 
 	return IAVF_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 03/11] net/iavf: flexible Rx descriptor support in normal path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in normal
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |   2 +
 drivers/net/iavf/iavf_ethdev.c |   8 +
 drivers/net/iavf/iavf_rxtx.c   | 485 ++++++++++++++++++++++++++++++---
 drivers/net/iavf/iavf_rxtx.h   |   9 +
 drivers/net/iavf/iavf_vchnl.c  |  55 +++-
 5 files changed, 521 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 368ddf604..fdb31b929 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -103,6 +103,7 @@ struct iavf_info {
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
+	uint64_t supported_rxdid;
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -235,6 +236,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_get_supported_rxdid(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
 int iavf_dev_link_update(struct rte_eth_dev *dev,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 9dadee3d6..4280a92bb 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1245,6 +1245,14 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+		if (iavf_get_supported_rxdid(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+			goto err_rss;
+		}
+	}
+
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 9eccb7c41..2eac1de2b 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -346,6 +346,14 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	if (vf->vf_res->vf_cap_flags &
+	    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+	    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+		rxq->rxdid = IAVF_RXDID_COMMS_OVS_1;
+	} else {
+		rxq->rxdid = IAVF_RXDID_LEGACY_1;
+	}
+
 	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_free_thresh;
@@ -720,6 +728,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 	}
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+			  volatile union iavf_rx_flex_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -754,6 +776,87 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+/* Translate the rx flex descriptor status to pkt flags */
+static inline void
+iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
+		       volatile union iavf_rx_flex_desc *rxdp)
+{
+	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+	uint16_t stat_err;
+
+	stat_err = rte_le_to_cpu_16(desc->status_error0);
+	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+		mb->ol_flags |= PKT_RX_RSS_HASH;
+		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+	}
+}
+#endif
+
+#define IAVF_RX_FLEX_ERR0_BITS	\
+	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+	uint64_t flags = 0;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+		return 0;
+
+	if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
+/* If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register. Update the RDT with the value of the last processed RX
+ * descriptor minus 1, to guarantee that the RDT register is never
+ * equal to the RDH register, which creates a "full" ring situtation
+ * from the hardware point of view.
+ */
+static inline void
+iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+{
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG,
+			   "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -854,23 +957,260 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	}
 	rxq->rx_tail = rx_id;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the receive tail register of queue.
-	 * Update that register with the value of the last processed RX
-	 * descriptor minus 1.
-	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)((rx_id == 0) ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	volatile union iavf_rx_desc *rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint16_t rx_stat_err0;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit first */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		iavf_rxd_to_pkt_fields(rxm, &rxd);
+#endif
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
 	}
-	rxq->nb_rx_hold = nb_hold;
+	rxq->rx_tail = rx_id;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint16_t rx_stat_err0;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+			if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(RTE_ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else {
+				rxm->data_len = (uint16_t)(rx_packet_len -
+							RTE_ETHER_CRC_LEN);
+			}
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		iavf_rxd_to_pkt_fields(first_seg, &rxd);
+#endif
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
 
 	return nb_rx;
 }
@@ -1027,30 +1367,90 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->pkt_first_seg = first_seg;
 	rxq->pkt_last_seg = last_seg;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
-	 * register. Update the RDT with the value of the last processed RX
-	 * descriptor minus 1, to guarantee that the RDT register is never
-	 * equal to the RDH register, which creates a "full" ring situtation
-	 * from the hardware point of view.
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+#define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *mb;
+	uint16_t stat_err0;
+	uint16_t pkt_len;
+	int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+	int32_t i, j, nb_rx = 0;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+	/* Make sure there is at least 1 packet to receive */
+	if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* Scan LOOK_AHEAD descriptors at a time to determine which
+	 * descriptors reference packets that are ready to be received.
 	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)(rx_id == 0 ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+	     rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+			s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+			nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf parameters */
+		for (j = 0; j < nb_dd; j++) {
+			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+					  rxq->rx_tail +
+					  i * IAVF_LOOK_AHEAD + j);
+
+			mb = rxep[j];
+			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->ol_flags = 0;
+
+			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
+#endif
+			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+			mb->ol_flags |= pkt_flags;
+		}
+
+		for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+			rxq->rx_stage[i + j] = rxep[j];
+
+		if (nb_dd != IAVF_LOOK_AHEAD)
+			break;
 	}
-	rxq->nb_rx_hold = nb_hold;
+
+	/* Clear software ring entries */
+	for (i = 0; i < nb_rx; i++)
+		rxq->sw_ring[rxq->rx_tail + i] = NULL;
 
 	return nb_rx;
 }
 
-#define IAVF_LOOK_AHEAD 8
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1219,7 +1619,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (rxq->rx_nb_avail)
 		return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-	nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+	else
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
 	rxq->rx_next_avail = 0;
 	rxq->rx_nb_avail = nb_rx;
 	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1663,6 +2066,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
 	struct iavf_rx_queue *rxq;
 	int i;
@@ -1702,7 +2106,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (dev->data->scattered_rx) {
 		PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
 			    dev->data->port_id);
@@ -1710,7 +2117,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	} else {
 		PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_pkts;
 	}
 }
 
@@ -1797,6 +2207,7 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index d04233bf8..8246e797f 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -60,8 +60,10 @@
 /* HW desc structure, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
+#define iavf_rx_flex_desc iavf_16b_rx_flex_desc
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
+#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
 #endif
 
 struct iavf_rxq_ops {
@@ -87,6 +89,7 @@ struct iavf_rx_queue {
 	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
 	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
 	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
+	uint8_t rxdid;
 
 	/* used for VPMD */
 	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
@@ -379,9 +382,15 @@ void iavf_dev_tx_queue_release(void *txq);
 void iavf_stop_queues(struct rte_eth_dev *dev);
 uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts(void *rx_queue,
 				 struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		       uint16_t nb_pkts);
 uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index b7fb05d32..b913f06c3 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 * add advanced/optional offload capabilities
 	 */
 
-	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+		VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -375,6 +377,32 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	return 0;
 }
 
+int
+iavf_get_supported_rxdid(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_SUPPORTED_RXDIDS");
+		return ret;
+	}
+
+	vf->supported_rxdid =
+		((struct virtchnl_supported_rxdids *)args.out_buffer)->supported_rxdids;
+
+	return 0;
+}
+
 int
 iavf_enable_queues(struct iavf_adapter *adapter)
 {
@@ -567,6 +595,31 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
+#else
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_LEGACY_0)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_0;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				PMD_DRV_LOG(ERR, "RXDID == 0 is not supported");
+				return -1;
+			}
+#endif
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 04/11] net/iavf: flexible Rx descriptor support in AVX path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (2 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-17 13:28     ` Ferruh Yigit
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
                     ` (7 subsequent siblings)
  11 siblings, 1 reply; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in AVX
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c          |  24 +-
 drivers/net/iavf/iavf_rxtx.h          |   6 +
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 550 +++++++++++++++++++++++++-
 3 files changed, 570 insertions(+), 10 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 2eac1de2b..8a1292823 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2087,16 +2087,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				    "Using %sVector Scattered Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_scattered_pkts_vec_avx2 :
-					    iavf_recv_scattered_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+					iavf_recv_scattered_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2 :
+					iavf_recv_scattered_pkts_vec;
 		} else {
 			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_pkts_vec_avx2 :
-					    iavf_recv_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2_flex_rxd :
+					iavf_recv_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2 :
+					iavf_recv_pkts_vec;
 		}
 
 		return;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 8246e797f..84ec39132 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -414,9 +414,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					  struct rte_mbuf **rx_pkts,
+					  uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue,
 					   struct rte_mbuf **rx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+						    struct rte_mbuf **rx_pkts,
+						    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 2587083d8..b23188fd3 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -11,14 +11,16 @@
 #endif
 
 static inline void
-iavf_rxq_rearm(struct iavf_rx_queue *rxq)
+iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp)
 {
 	int i;
 	uint16_t rx_id;
-	volatile union iavf_rx_desc *rxdp;
 	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
 
-	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) {
+		volatile union iavf_rx_flex_desc *rxdp =
+			(union iavf_rx_flex_desc *)rxdp;
+	}
 
 	/* Pull 'n' more MBUFs into the software ring */
 	if (rte_mempool_get_bulk(rxq->mp,
@@ -160,7 +162,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	 * of time to act
 	 */
 	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
-		iavf_rxq_rearm(rxq);
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
 
 	/* Before we start moving massive data around, check to see if
 	 * there is actually a packet available
@@ -614,6 +616,465 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline uint16_t
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts, uint8_t *split_packet)
+{
+#define IAVF_DESCS_PER_LOOP_AVX 8
+
+	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
+			0, rxq->mbuf_initializer);
+	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_flex_desc *rxdp =
+		(union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+		/* iavf_rxq_rearm(rxq); */
+		iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* constants used in processing loop */
+	const __m256i crc_adjust =
+		_mm256_set_epi16
+			(/* first descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0,          /* ignore pkt_type field */
+			 /* second descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0           /* ignore pkt_type field */
+			);
+
+	/* 8 packets DD mask, LSB in each 32-bit value */
+	const __m256i dd_check = _mm256_set1_epi32(1);
+
+	/* 8 packets EOP mask, second-LSB in each 32-bit value */
+	const __m256i eop_check = _mm256_slli_epi32(dd_check,
+			IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+
+	/* mask to shuffle from desc. to mbuf (2 descriptors)*/
+	const __m256i shuf_msk =
+		_mm256_set_epi8
+			(/* first descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF,	/*pkt_type set as unknown */
+			 /* second descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF	/*pkt_type set as unknown */
+			);
+	/**
+	 * compile-time check the above crc and shuffle layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi
+	 * calls above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Status/Error flag masks */
+	/**
+	 * mask everything except Checksum Reports, RSS indication
+	 * and VLAN indication.
+	 * bit6:4 for IP/L4 checksum errors.
+	 * bit12 is for RSS indication.
+	 * bit13 is for VLAN indication.
+	 */
+	const __m256i flags_mask =
+		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+	/**
+	 * data to be shuffled by the result of the flags mask shifted by 4
+	 * bits.  This gives use the l3_l4 flags.
+	 */
+	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			/* second 128-bits */
+			0, 0, 0, 0, 0, 0, 0, 0,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i cksum_mask =
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+				   PKT_RX_EIP_CKSUM_BAD);
+	/**
+	 * data to be shuffled by result of flag mask, shifted down 12.
+	 * If RSS(bit12)/VLAN(bit13) are set,
+	 * shuffle moves appropriate flags in place.
+	 */
+	const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0,
+			/* end up 128-bits */
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	uint16_t i, received;
+
+	for (i = 0, received = 0; i < nb_pkts;
+	     i += IAVF_DESCS_PER_LOOP_AVX,
+	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+				    _mm256_loadu_si256((void *)&sw_ring[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256
+			((void *)&rx_pkts[i + 4],
+			 _mm256_loadu_si256((void *)&sw_ring[i + 4]));
+#endif
+
+		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
+
+		if (split_packet) {
+			int j;
+
+			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		/**
+		 * convert descriptors 4-7 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk);
+		__m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk);
+
+		mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust);
+		mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptype_mask =
+			_mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M);
+		const __m256i ptypes6_7 =
+			_mm256_and_si256(raw_desc6_7, ptype_mask);
+		const __m256i ptypes4_5 =
+			_mm256_and_si256(raw_desc4_5, ptype_mask);
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1);
+
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4);
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0);
+		/* merge the status bits into one register */
+		const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7,
+				raw_desc4_5);
+
+		/**
+		 * convert descriptors 0-3 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk);
+		__m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk);
+
+		mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust);
+		mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptypes2_3 =
+			_mm256_and_si256(raw_desc2_3, ptype_mask);
+		const __m256i ptypes0_1 =
+			_mm256_and_si256(raw_desc0_1, ptype_mask);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1);
+
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4);
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0);
+		/* merge the status bits into one register */
+		const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3,
+								raw_desc0_1);
+
+		/**
+		 * take the two sets of status bits and merge to one
+		 * After merge, the packets status flags are in the
+		 * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]
+		 */
+		__m256i status0_7 = _mm256_unpacklo_epi64(status4_7,
+							  status0_3);
+
+		/* now do flag manipulation */
+
+		/* get only flag/error bits we want */
+		const __m256i flag_bits =
+			_mm256_and_si256(status0_7, flags_mask);
+		/**
+		 * l3_l4_error flags, shuffle, then shift to correct adjustment
+		 * of flags in flags_shuf, and finally mask out extra bits
+		 */
+		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
+				_mm256_srli_epi32(flag_bits, 4));
+		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
+		/* set rss and vlan flags */
+		const __m256i rss_vlan_flag_bits =
+			_mm256_srli_epi32(flag_bits, 12);
+		const __m256i rss_vlan_flags =
+			_mm256_shuffle_epi8(rss_vlan_flags_shuf,
+					    rss_vlan_flag_bits);
+
+		/* merge flags */
+		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+				rss_vlan_flags);
+		/**
+		 * At this point, we have the 8 sets of flags in the low 16-bits
+		 * of each 32-bit value in vlan0.
+		 * We want to extract these, and merge them with the mbuf init
+		 * data so we can do a single write to the mbuf to set the flags
+		 * and all the other initialization fields. Extracting the
+		 * appropriate flags means that we have to do a shift and blend
+		 * for each mbuf before we do the write. However, we can also
+		 * add in the previously computed rx_descriptor fields to
+		 * make a single 256-bit write per mbuf
+		 */
+		/* check the structure matches expectations */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				 offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				 RTE_ALIGN(offsetof(struct rte_mbuf,
+						    rearm_data),
+					   16));
+		/* build up data and do writes */
+		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
+			rearm6, rearm7;
+		rearm6 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 8),
+					    0x04);
+		rearm4 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 4),
+					    0x04);
+		rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm0 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(mbuf_flags, 4),
+					    0x04);
+		/* permute to add in the rx_descriptor e.g. rss fields */
+		rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20);
+		rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20);
+		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
+		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
+		/* write to mbuf */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,
+				    rearm6);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,
+				    rearm4);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,
+				    rearm2);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,
+				    rearm0);
+
+		/* repeat for the odd mbufs */
+		const __m256i odd_flags =
+			_mm256_castsi128_si256
+				(_mm256_extracti128_si256(mbuf_flags, 1));
+		rearm7 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 8),
+					    0x04);
+		rearm5 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 4),
+					    0x04);
+		rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04);
+		rearm1 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(odd_flags, 4),
+					    0x04);
+		/* since odd mbufs are already in hi 128-bits use blend */
+		rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0);
+		rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0);
+		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
+		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
+		/* again write to mbufs */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,
+				    rearm7);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,
+				    rearm5);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,
+				    rearm3);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,
+				    rearm1);
+
+		/* extract and record EOP bit */
+		if (split_packet) {
+			const __m128i eop_mask =
+				_mm_set1_epi16(1 <<
+					       IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+			const __m256i eop_bits256 = _mm256_and_si256(status0_7,
+								     eop_check);
+			/* pack status bits into a single 128-bit register */
+			const __m128i eop_bits =
+				_mm_packus_epi32
+					(_mm256_castsi256_si128(eop_bits256),
+					 _mm256_extractf128_si256(eop_bits256,
+								  1));
+			/**
+			 * flip bits, and mask out the EOP bit, which is now
+			 * a split-packet bit i.e. !EOP, rather than EOP one.
+			 */
+			__m128i split_bits = _mm_andnot_si128(eop_bits,
+					eop_mask);
+			/**
+			 * eop bits are out of order, so we need to shuffle them
+			 * back into order again. In doing so, only use low 8
+			 * bits, which acts like another pack instruction
+			 * The original order is (hi->lo): 1,3,5,7,0,2,4,6
+			 * [Since we use epi8, the 16-bit positions are
+			 * multiplied by 2 in the eop_shuffle value.]
+			 */
+			__m128i eop_shuffle =
+				_mm_set_epi8(/* zero hi 64b */
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     /* move values to lo 64b */
+					     8, 0, 10, 2,
+					     12, 4, 14, 6);
+			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
+			*(uint64_t *)split_packet =
+				_mm_cvtsi128_si64(split_bits);
+			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+		}
+
+		/* perform dd_check */
+		status0_7 = _mm256_and_si256(status0_7, dd_check);
+		status0_7 = _mm256_packs_epi32(status0_7,
+					       _mm256_setzero_si256());
+
+		uint64_t burst = __builtin_popcountll
+					(_mm_cvtsi128_si64
+						(_mm256_extracti128_si256
+							(status0_7, 1)));
+		burst += __builtin_popcountll
+				(_mm_cvtsi128_si64
+					(_mm256_castsi256_si128(status0_7)));
+		received += burst;
+		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+			break;
+	}
+
+	/* update tail pointers */
+	rxq->rx_tail += received;
+	rxq->rx_tail &= (rxq->nb_rx_desc - 1);
+	if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */
+		rxq->rx_tail--;
+		received--;
+	}
+	rxq->rxrearm_nb += received;
+	return received;
+}
+
 /**
  * Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
@@ -625,6 +1086,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/**
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts,
+						     nb_pkts, NULL);
+}
+
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
@@ -690,6 +1163,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_pkts + retval, nb_pkts);
 }
 
+/**
+ * vPMD receive routine that reassembles single burst of
+ * 32 scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+static uint16_t
+iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
+					    struct rte_mbuf **rx_pkts,
+					    uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
+					rx_pkts, nb_pkts, split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+					     &split_flags[i]);
+}
+
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD.
+ * Main receive routine that can handle arbitrary burst sizes
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst =
+			iavf_recv_scattered_burst_vec_avx2_flex_rxd
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
+				rx_pkts + retval, nb_pkts);
+}
+
 static inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 05/11] net/iavf: flexible Rx descriptor support in SSE path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (3 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 06/11] net/iavf: add flow director enabled switch value Leyi Rong
                     ` (6 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in SSE
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c         |   4 +-
 drivers/net/iavf/iavf_rxtx.h         |   5 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 8a1292823..92bf4cf8b 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2091,7 +2091,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
-					iavf_recv_scattered_pkts_vec;
+					iavf_recv_scattered_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2 :
@@ -2104,7 +2104,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2_flex_rxd :
-					iavf_recv_pkts_vec;
+					iavf_recv_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2 :
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 84ec39132..9bc857312 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -407,9 +407,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				     uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+					       struct rte_mbuf **rx_pkts,
+					       uint16_t nb_pkts);
 uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 0365c49e1..9c1f2a445 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline void
+flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+		       struct rte_mbuf **rx_pkts)
+{
+	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	__m128i rearm0, rearm1, rearm2, rearm3;
+
+	__m128i tmp_desc, flags, rss_vlan;
+
+	/* mask everything except checksum, RSS and VLAN flags.
+	 * bit6:4 for checksum.
+	 * bit12 for RSS indication.
+	 * bit13 for VLAN indication.
+	 */
+	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
+						0x3070, 0x3070);
+
+	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD);
+
+	/* map the checksum, rss and vlan fields to the checksum, rss
+	 * and vlan flag
+	 */
+	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+
+	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	/* merge 4 descriptors */
+	flags = _mm_unpackhi_epi32(descs[0], descs[1]);
+	tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]);
+	tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc);
+	tmp_desc = _mm_and_si128(flags, desc_mask);
+
+	/* checksum flags */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 4);
+	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
+	/* then we shift left 1 bit */
+	flags = _mm_slli_epi32(flags, 1);
+	/* we need to mask out the reduntant bits introduced by RSS or
+	 * VLAN fields.
+	 */
+	flags = _mm_and_si128(flags, cksum_mask);
+
+	/* RSS, VLAN flag */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 8);
+	rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc);
+
+	/* merge the flags */
+	flags = _mm_or_si128(flags, rss_vlan);
+
+	/**
+	 * At this point, we have the 4 sets of flags in the low 16-bits
+	 * of each 32-bit value in flags.
+	 * We want to extract these, and merge them with the mbuf init data
+	 * so we can do a single 16-byte write to the mbuf to set the flags
+	 * and all the other initialization fields. Extracting the
+	 * appropriate flags means that we have to do a shift and blend for
+	 * each mbuf before we do the write.
+	 */
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+
+	/* write the rearm data and the olflags in one write */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_mbuf, rearm_data) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
 #define PKTLEN_SHIFT     10
 
 static inline void
@@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)];
 }
 
+static inline void
+flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
+		     const uint32_t *type_table)
+{
+	const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M);
+	__m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
+	__m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
+	__m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
+
+	ptype_all = _mm_and_si128(ptype_all, ptype_mask);
+
+	rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)];
+	rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)];
+	rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)];
+	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
+}
+
 /* Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+			    struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	__m128i crc_adjust = _mm_set_epi16
+				(0, 0, 0,       /* ignore non-length fields */
+				 -rxq->crc_len, /* sub crc on data_len */
+				 0,          /* ignore high-16bits of pkt_len */
+				 -rxq->crc_len, /* sub crc on pkt_len */
+				 0, 0           /* ignore pkt_type field */
+				);
+	const __m128i zero = _mm_setzero_si128();
+	/* mask to shuffle from desc. to mbuf */
+	const __m128i shuf_msk = _mm_set_epi8
+			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
+			 5, 4,        /* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			 5, 4,        /* octet 4~5, low 16 bits pkt_len */
+			 0xFF, 0xFF,  /* pkt_type set as unknown */
+			 0xFF, 0xFF   /* pkt_type set as unknown */
+			);
+	const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0x04, 0x0C,
+						   0x00, 0x08);
+
+	/**
+	 * compile-time check the above crc_adjust layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi16
+	 * call above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
+	/* 4 packets DD mask */
+	const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL,
+						0x0000000100000001LL);
+	/* 4 packets EOP mask */
+	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
+						 0x0000000200000002LL);
+
+	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
+
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > rxq->rx_free_thresh)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/**
+	 * Compile-time verify the shuffle mask
+	 * NOTE: some field positions already verified above, but duplicated
+	 * here for completeness in case of future modifications.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+	     pos += IAVF_VPMD_DESCS_PER_LOOP,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i staterr, sterr_tmp1, sterr_tmp2;
+		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
+		__m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+		__m128i mbp2;
+#endif
+
+		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.1 load 2 64 bit mbuf points */
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+#endif
+
+		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+		rte_compiler_barrier();
+		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
+
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+			 pkt_mb4);
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+			 pkt_mb3);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			/* and with mask to extract bits, flipping 1-0 */
+			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = _mm_and_si128(staterr, dd_check);
+		staterr = _mm_packs_epi32(staterr, zero);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+			 pkt_mb2);
+		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+				 pkt_mb1);
+		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		nb_pkts_recd += var;
+		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
 /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
@@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
+/* vPMD receive routine that reassembles scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	unsigned int i = 0;
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
+					      split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 06/11] net/iavf: add flow director enabled switch value
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (4 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 07/11] net/iavf: support flow mark in normal data path Leyi Rong
                     ` (5 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

The commit adds fdir_enabled flag into iavf_rx_queue structure
to identify if fdir id is active. Rx data path can be benefit if
fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index fdb31b929..17ceff734 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -144,6 +144,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint16_t fdir_ref_cnt;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 9bc857312..73968847f 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -104,6 +104,7 @@ struct iavf_rx_queue {
 
 	uint16_t port_id;        /* device port ID */
 	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
+	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
 	uint16_t queue_id;      /* Rx queue index */
 	uint16_t rx_buf_len;    /* The packet buffer size */
 	uint16_t rx_hdr_len;    /* The header buffer size */
@@ -491,6 +492,35 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+#define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
+	int i; \
+	for (i = 0; i < (ad)->eth_dev->data->nb_rx_queues; i++) { \
+		struct iavf_rx_queue *rxq = (ad)->eth_dev->data->rx_queues[i]; \
+		if (!rxq) \
+			continue; \
+		rxq->fdir_enabled = on; \
+	} \
+	PMD_DRV_LOG(DEBUG, "FDIR processing on RX set to %d", on); \
+} while (0)
+
+/* Enable/disable flow director Rx processing in data path. */
+static inline
+void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on)
+{
+	if (on) {
+		/* enable flow director processing */
+		if (ad->fdir_ref_cnt++ == 0)
+			FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+	} else {
+		if (ad->fdir_ref_cnt >= 1) {
+			ad->fdir_ref_cnt--;
+
+			if (ad->fdir_ref_cnt == 0)
+				FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+		}
+	}
+}
+
 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 07/11] net/iavf: support flow mark in normal data path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (5 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 06/11] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 08/11] net/iavf: support flow mark in AVX path Leyi Rong
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing in normal path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  3 +++
 drivers/net/iavf/iavf_rxtx.c | 51 +++++++++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 17ceff734..78bdaff20 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -67,6 +67,9 @@
 #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define IAVF_48_BIT_MASK  RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t)
 
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 92bf4cf8b..42b5f398e 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -756,6 +756,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 					IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
 			IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+	/* Check if FDIR Match */
+	flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+				PKT_RX_FDIR : 0);
+
 	if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
 		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 		return flags;
@@ -776,7 +780,31 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+	uint64_t flags = 0;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	uint16_t flexbh;
+
+	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+	if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+		mb->hash.fdir.hi =
+			rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+		flags |= PKT_RX_FDIR_ID;
+	}
+#else
+	mb->hash.fdir.hi =
+		rte_le_to_cpu_32(rxdp->wb.qword0.hi_dword.fd_id);
+	flags |= PKT_RX_FDIR_ID;
+#endif
+	return flags;
+}
+
+
 /* Translate the rx flex descriptor status to pkt flags */
 static inline void
 iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
@@ -784,6 +812,7 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 {
 	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
 			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint16_t stat_err;
 
 	stat_err = rte_le_to_cpu_16(desc->status_error0);
@@ -791,9 +820,14 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 		mb->ol_flags |= PKT_RX_RSS_HASH;
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
-}
 #endif
 
+	if (desc->flow_id != 0xFFFFFFFF) {
+		mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+	}
+}
+
 #define IAVF_RX_FLEX_ERR0_BITS	\
 	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
 	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
@@ -951,6 +985,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1047,9 +1084,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 		iavf_rxd_to_pkt_fields(rxm, &rxd);
-#endif
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 		rxm->ol_flags |= pkt_flags;
 
@@ -1191,9 +1226,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 		iavf_rxd_to_pkt_fields(first_seg, &rxd);
-#endif
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
 		first_seg->ol_flags |= pkt_flags;
@@ -1353,6 +1386,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			first_seg->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
 		first_seg->ol_flags |= pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1428,9 +1464,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
 			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
-#endif
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
 
@@ -1521,6 +1555,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 				mb->hash.rss = rte_le_to_cpu_32(
 					rxdp[j].wb.qword0.hi_dword.rss);
 
+			if (pkt_flags & PKT_RX_FDIR)
+				pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
 			mb->ol_flags |= pkt_flags;
 		}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 08/11] net/iavf: support flow mark in AVX path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (6 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 07/11] net/iavf: support flow mark in normal data path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 09/11] net/iavf: support flow mark in SSE path Leyi Rong
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in AVX path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 72 +++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index b23188fd3..3bf5833fa 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -616,6 +616,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline __m256i
+flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC);
+	__m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask);
+	const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline uint16_t
 _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
@@ -678,8 +697,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m256i shuf_msk =
 		_mm256_set_epi8
 			(/* first descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -687,8 +706,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/* pkt_type set as unknown */
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -930,8 +949,51 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 					    rss_vlan_flag_bits);
 
 		/* merge flags */
-		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+		__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
 				rss_vlan_flags);
+
+		if (rxq->fdir_enabled) {
+			const __m256i fdir_id4_7 =
+				_mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5);
+
+			const __m256i fdir_id0_3 =
+				_mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1);
+
+			const __m256i fdir_id0_7 =
+				_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);
+
+			const __m256i fdir_flags =
+				flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7);
+
+			/* merge with fdir_flags */
+			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);
+
+			/* write to mbuf: have to use scalar store here */
+			rx_pkts[i + 0]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 3);
+
+			rx_pkts[i + 1]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 7);
+
+			rx_pkts[i + 2]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 2);
+
+			rx_pkts[i + 3]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 6);
+
+			rx_pkts[i + 4]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 1);
+
+			rx_pkts[i + 5]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 5);
+
+			rx_pkts[i + 6]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 0);
+
+			rx_pkts[i + 7]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 4);
+		} /* if() on fdir_enabled */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 09/11] net/iavf: support flow mark in SSE path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (7 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 08/11] net/iavf: support flow mark in AVX path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in SSE path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 52 +++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 9c1f2a445..d7d840853 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline __m128i
+flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC);
+	__m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask);
+	const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
@@ -267,6 +286,36 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
+	if (rxq->fdir_enabled) {
+		const __m128i fdir_id0_1 =
+			_mm_unpackhi_epi32(descs[0], descs[1]);
+
+		const __m128i fdir_id2_3 =
+			_mm_unpackhi_epi32(descs[2], descs[3]);
+
+		const __m128i fdir_id0_3 =
+			_mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3);
+
+		const __m128i fdir_flags =
+			flex_rxd_to_fdir_flags_vec(fdir_id0_3);
+
+		/* merge with fdir_flags */
+		flags = _mm_or_si128(flags, fdir_flags);
+
+		/* write fdir_id to mbuf */
+		rx_pkts[0]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 0);
+
+		rx_pkts[1]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 1);
+
+		rx_pkts[2]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 2);
+
+		rx_pkts[3]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 3);
+	} /* if() on fdir_enabled */
+
 	/**
 	 * At this point, we have the 4 sets of flags in the low 16-bits
 	 * of each 32-bit value in flags.
@@ -604,7 +653,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i zero = _mm_setzero_si128();
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
-			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			(0xFF, 0xFF,
+			 0xFF, 0xFF,  /* rss not supported */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 10/11] net/iavf: add RSS hash parsing in AVX path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (8 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 09/11] net/iavf: support flow mark in SSE path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  2020-04-16 12:21   ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Zhang, Qi Z
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in AVX data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 94 ++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 3bf5833fa..c725d9d66 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		_mm256_set_epi8
 			(/* first descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -994,6 +994,96 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				_mm256_extract_epi32(fdir_id0_7, 4);
 		} /* if() on fdir_enabled */
 
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh7 =
+				_mm_load_si128
+					((void *)(&rxdp[7].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh6 =
+				_mm_load_si128
+					((void *)(&rxdp[6].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh5 =
+				_mm_load_si128
+					((void *)(&rxdp[5].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh4 =
+				_mm_load_si128
+					((void *)(&rxdp[4].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			__m256i raw_desc_bh6_7 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh6),
+					raw_desc_bh7, 1);
+			__m256i raw_desc_bh4_5 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh4),
+					raw_desc_bh5, 1);
+			__m256i raw_desc_bh2_3 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh2),
+					raw_desc_bh3, 1);
+			__m256i raw_desc_bh0_1 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh0),
+					raw_desc_bh1, 1);
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m256i rss_hash6_7 =
+				_mm256_slli_epi64(raw_desc_bh6_7, 32);
+			__m256i rss_hash4_5 =
+				_mm256_slli_epi64(raw_desc_bh4_5, 32);
+			__m256i rss_hash2_3 =
+				_mm256_slli_epi64(raw_desc_bh2_3, 32);
+			__m256i rss_hash0_1 =
+				_mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+			__m256i rss_hash_msk =
+				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+						 0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash6_7 = _mm256_and_si256
+					(rss_hash6_7, rss_hash_msk);
+			rss_hash4_5 = _mm256_and_si256
+					(rss_hash4_5, rss_hash_msk);
+			rss_hash2_3 = _mm256_and_si256
+					(rss_hash2_3, rss_hash_msk);
+			rss_hash0_1 = _mm256_and_si256
+					(rss_hash0_1, rss_hash_msk);
+
+			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+		} /* if() on RSS hash parsing */
+#endif
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v5 11/11] net/iavf: add RSS hash parsing in SSE path
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (9 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
@ 2020-04-16  8:09   ` Leyi Rong
  2020-04-16 12:21   ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Zhang, Qi Z
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-16  8:09 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in SSE data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 88 +++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 16 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index d7d840853..2f115aa94 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -654,7 +654,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
 			(0xFF, 0xFF,
-			 0xFF, 0xFF,  /* rss not supported */
+			 0xFF, 0xFF,  /* rss hash parsed separately */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -745,7 +745,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
 		__m128i mbp1;
@@ -791,8 +791,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		rte_compiler_barrier();
 
 		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -802,12 +806,68 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
-		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m128i rss_hash3 =
+				_mm_slli_epi64(raw_desc_bh3, 32);
+			__m128i rss_hash2 =
+				_mm_slli_epi64(raw_desc_bh2, 32);
+			__m128i rss_hash1 =
+				_mm_slli_epi64(raw_desc_bh1, 32);
+			__m128i rss_hash0 =
+				_mm_slli_epi64(raw_desc_bh0, 32);
+
+			__m128i rss_hash_msk =
+				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash3 = _mm_and_si128
+					(rss_hash3, rss_hash_msk);
+			rss_hash2 = _mm_and_si128
+					(rss_hash2, rss_hash_msk);
+			rss_hash1 = _mm_and_si128
+					(rss_hash1, rss_hash_msk);
+			rss_hash0 = _mm_and_si128
+					(rss_hash0, rss_hash_msk);
+
+			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+		} /* if() on RSS hash parsing */
+#endif
 
 		/* C.2 get 4 pkts staterr value  */
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -815,14 +875,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-			 pkt_mb4);
+			 pkt_mb3);
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-			 pkt_mb3);
-
-		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+			 pkt_mb2);
 
 		/* C* extract and record EOP bit */
 		if (split_packet) {
@@ -846,9 +902,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-			 pkt_mb2);
+			 pkt_mb1);
 		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-				 pkt_mb1);
+				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
                     ` (10 preceding siblings ...)
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
@ 2020-04-16 12:21   ` Zhang, Qi Z
  2020-04-17  4:11     ` Ye Xiaolong
  11 siblings, 1 reply; 80+ messages in thread
From: Zhang, Qi Z @ 2020-04-16 12:21 UTC (permalink / raw)
  To: Rong, Leyi, Wu, Jingjing, Xing, Beilei, Ye, Xiaolong; +Cc: dev



> -----Original Message-----
> From: Rong, Leyi <leyi.rong@intel.com>
> Sent: Thursday, April 16, 2020 4:10 PM
> To: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Ye, Xiaolong
> <xiaolong.ye@intel.com>
> Cc: dev@dpdk.org; Rong, Leyi <leyi.rong@intel.com>
> Subject: [PATCH v5 00/11] framework for advanced iAVF PMD
> 
> This patchset enable framework for advanced iAVF, includes flexible
> descriptor support, FDIR mark id and RSS hash support.
> 
> ---
> v5:
> - Set rxdid = 0 in iavf_configure_queues() when it's supported and
>   RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y. So the error can be captured
>   explicitly as kernel PF does not support it yet.
> - Fix some build errors when RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y.
> 
> v4:
> - Avoid initial declaration in 'for' loop to backward-compatible
>   with older gcc versions.
> 
> v3:
> - Remove patch to query DDP package info as it's specific to DCF.
> - Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
> - Move fdir_enabled flag to per Queue value for cache benefit when
>   access in vector routines.
> - Store extracted flow_id value to mbuf without judgement to avoid
>   branch speculation fail, which is benefit to performance.
> 
> v2:
> - Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
>   it's defined in AVF spec.
> - Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
> - Move flex desc definitions into iavf_rxtx.h.
> - Up to date to match with the latest version of virtchnl.h.
> - Extract a new internal func iavf_update_rx_tail to call.
> - Remove
>   iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
>   as the accompanying legacy ones can deal with the flex desc cases.
> - Move rxq->rxdid assignment from iavf_configure_queues()
>   to iavf_dev_rx_queue_setup().
> - Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
>   when using GCC compile option -O0.
> 
> Leyi Rong (11):
>   net/iavf: flexible Rx descriptor definitions
>   net/iavf: return error if opcode is mismatched
>   net/iavf: flexible Rx descriptor support in normal path
>   net/iavf: flexible Rx descriptor support in AVX path
>   net/iavf: flexible Rx descriptor support in SSE path
>   net/iavf: add flow director enabled switch value
>   net/iavf: support flow mark in normal data path
>   net/iavf: support flow mark in AVX path
>   net/iavf: support flow mark in SSE path
>   net/iavf: add RSS hash parsing in AVX path
>   net/iavf: add RSS hash parsing in SSE path
> 
>  drivers/net/iavf/iavf.h               |   6 +
>  drivers/net/iavf/iavf_ethdev.c        |   8 +
>  drivers/net/iavf/iavf_rxtx.c          | 546 ++++++++++++++++++--
>  drivers/net/iavf/iavf_rxtx.h          | 250 +++++++++
>  drivers/net/iavf/iavf_rxtx_vec_avx2.c | 702
> +++++++++++++++++++++++++-  drivers/net/iavf/iavf_rxtx_vec_sse.c  |
> 520 +++++++++++++++++++
>  drivers/net/iavf/iavf_vchnl.c         |  59 ++-
>  7 files changed, 2042 insertions(+), 49 deletions(-)
> 
> --
> 2.17.1

Reviewed-by: Qi Zhang <qi.z.zhang@intel.com>



^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD
  2020-04-16 12:21   ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Zhang, Qi Z
@ 2020-04-17  4:11     ` Ye Xiaolong
  0 siblings, 0 replies; 80+ messages in thread
From: Ye Xiaolong @ 2020-04-17  4:11 UTC (permalink / raw)
  To: Zhang, Qi Z; +Cc: Rong, Leyi, Wu, Jingjing, Xing, Beilei, dev

On 04/16, Zhang, Qi Z wrote:
>
>
>> -----Original Message-----
>> From: Rong, Leyi <leyi.rong@intel.com>
>> Sent: Thursday, April 16, 2020 4:10 PM
>> To: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Ye, Xiaolong
>> <xiaolong.ye@intel.com>
>> Cc: dev@dpdk.org; Rong, Leyi <leyi.rong@intel.com>
>> Subject: [PATCH v5 00/11] framework for advanced iAVF PMD
>> 
>> This patchset enable framework for advanced iAVF, includes flexible
>> descriptor support, FDIR mark id and RSS hash support.
>> 
>> ---
>> v5:
>> - Set rxdid = 0 in iavf_configure_queues() when it's supported and
>>   RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y. So the error can be captured
>>   explicitly as kernel PF does not support it yet.
>> - Fix some build errors when RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y.
>> 
>> v4:
>> - Avoid initial declaration in 'for' loop to backward-compatible
>>   with older gcc versions.
>> 
>> v3:
>> - Remove patch to query DDP package info as it's specific to DCF.
>> - Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
>> - Move fdir_enabled flag to per Queue value for cache benefit when
>>   access in vector routines.
>> - Store extracted flow_id value to mbuf without judgement to avoid
>>   branch speculation fail, which is benefit to performance.
>> 
>> v2:
>> - Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
>>   it's defined in AVF spec.
>> - Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
>> - Move flex desc definitions into iavf_rxtx.h.
>> - Up to date to match with the latest version of virtchnl.h.
>> - Extract a new internal func iavf_update_rx_tail to call.
>> - Remove
>>   iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
>>   as the accompanying legacy ones can deal with the flex desc cases.
>> - Move rxq->rxdid assignment from iavf_configure_queues()
>>   to iavf_dev_rx_queue_setup().
>> - Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
>>   when using GCC compile option -O0.
>> 
>> Leyi Rong (11):
>>   net/iavf: flexible Rx descriptor definitions
>>   net/iavf: return error if opcode is mismatched
>>   net/iavf: flexible Rx descriptor support in normal path
>>   net/iavf: flexible Rx descriptor support in AVX path
>>   net/iavf: flexible Rx descriptor support in SSE path
>>   net/iavf: add flow director enabled switch value
>>   net/iavf: support flow mark in normal data path
>>   net/iavf: support flow mark in AVX path
>>   net/iavf: support flow mark in SSE path
>>   net/iavf: add RSS hash parsing in AVX path
>>   net/iavf: add RSS hash parsing in SSE path
>> 
>>  drivers/net/iavf/iavf.h               |   6 +
>>  drivers/net/iavf/iavf_ethdev.c        |   8 +
>>  drivers/net/iavf/iavf_rxtx.c          | 546 ++++++++++++++++++--
>>  drivers/net/iavf/iavf_rxtx.h          | 250 +++++++++
>>  drivers/net/iavf/iavf_rxtx_vec_avx2.c | 702
>> +++++++++++++++++++++++++-  drivers/net/iavf/iavf_rxtx_vec_sse.c  |
>> 520 +++++++++++++++++++
>>  drivers/net/iavf/iavf_vchnl.c         |  59 ++-
>>  7 files changed, 2042 insertions(+), 49 deletions(-)
>> 
>> --
>> 2.17.1
>
>Reviewed-by: Qi Zhang <qi.z.zhang@intel.com>
>
>

Applied to dpdk-next-net-intel, Thanks.

^ permalink raw reply	[flat|nested] 80+ messages in thread

* Re: [dpdk-dev] [PATCH v5 04/11] net/iavf: flexible Rx descriptor support in AVX path
  2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-04-17 13:28     ` Ferruh Yigit
  0 siblings, 0 replies; 80+ messages in thread
From: Ferruh Yigit @ 2020-04-17 13:28 UTC (permalink / raw)
  To: Leyi Rong, jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev

On 4/16/2020 9:09 AM, Leyi Rong wrote:
> Support flexible Rx descriptor format in AVX
> path of iAVF PMD.
> 
> Signed-off-by: Leyi Rong <leyi.rong@intel.com>

<...>

> --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
> +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
> @@ -11,14 +11,16 @@
>  #endif
>  
>  static inline void
> -iavf_rxq_rearm(struct iavf_rx_queue *rxq)
> +iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp)
>  {
>  	int i;
>  	uint16_t rx_id;
> -	volatile union iavf_rx_desc *rxdp;
>  	struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
>  
> -	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> +	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) {
> +		volatile union iavf_rx_flex_desc *rxdp =
> +			(union iavf_rx_flex_desc *)rxdp;
> +	}

Hi Leyi,

This code is causing a build error in icc [1] but what this code exactly does?
It is creating a local version of the variable 'rxdp', which is confusing, but
that variable's scope is only if block, so it looks like this code really
doesn't do anything at all, am I missing something?

[1]
.../drivers/net/iavf/iavf_rxtx_vec_avx2.c(22): warning #592: variable "rxdp" is
used before its value is set
    (union iavf_rx_flex_desc *)rxdp;

^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 00/11] framework for advanced iAVF PMD
  2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
                   ` (15 preceding siblings ...)
  2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
@ 2020-04-20  6:16 ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
                     ` (11 more replies)
  16 siblings, 12 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

This patchset enable framework for advanced iAVF, includes
flexible descriptor support, FDIR mark id and RSS hash support.

---
v6:
- Remove some redundant code which may cause ICC build error.

v5:
- Set rxdid = 0 in iavf_configure_queues() when it's supported and
  RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y. So the error can be captured
  explicitly as kernel PF does not support it yet.
- Fix some build errors when RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y.

v4:
- Avoid initial declaration in 'for' loop to backward-compatible
  with older gcc versions.

v3:
- Remove patch to query DDP package info as it's specific to DCF.
- Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
- Move fdir_enabled flag to per Queue value for cache benefit when
  access in vector routines.
- Store extracted flow_id value to mbuf without judgement to avoid
  branch speculation fail, which is benefit to performance.

v2:
- Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
  it's defined in AVF spec.
- Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
- Move flex desc definitions into iavf_rxtx.h.
- Up to date to match with the latest version of virtchnl.h.
- Extract a new internal func iavf_update_rx_tail to call.
- Remove
  iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
  as the accompanying legacy ones can deal with the flex desc cases.
- Move rxq->rxdid assignment from iavf_configure_queues()
  to iavf_dev_rx_queue_setup().
- Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
  when using GCC compile option -O0.

Leyi Rong (11):
  net/iavf: flexible Rx descriptor definitions
  net/iavf: return error if opcode is mismatched
  net/iavf: flexible Rx descriptor support in normal path
  net/iavf: flexible Rx descriptor support in AVX path
  net/iavf: flexible Rx descriptor support in SSE path
  net/iavf: add flow director enabled switch value
  net/iavf: support flow mark in normal data path
  net/iavf: support flow mark in AVX path
  net/iavf: support flow mark in SSE path
  net/iavf: add RSS hash parsing in AVX path
  net/iavf: add RSS hash parsing in SSE path

 drivers/net/iavf/iavf.h               |   6 +
 drivers/net/iavf/iavf_ethdev.c        |   8 +
 drivers/net/iavf/iavf_rxtx.c          | 546 ++++++++++++++++++--
 drivers/net/iavf/iavf_rxtx.h          | 250 ++++++++++
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 691 ++++++++++++++++++++++++++
 drivers/net/iavf/iavf_rxtx_vec_sse.c  | 520 +++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c         |  59 ++-
 7 files changed, 2035 insertions(+), 45 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 01/11] net/iavf: flexible Rx descriptor definitions
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
                     ` (10 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Add definitions for flexible Rx descriptor structures and macros.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.h | 200 +++++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 09b5bd99e..d04233bf8 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -157,6 +157,206 @@ union iavf_tx_offload {
 	};
 };
 
+/* Rx Flex Descriptors
+ * These descriptors are used instead of the legacy version descriptors
+ */
+union iavf_16b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+	} wb; /* writeback */
+};
+
+union iavf_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor
+ * RxDID Profile ID 16-21
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Flow ID upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Rx Flex Descriptor
+ * RxDID Profile ID 22-23 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct iavf_32b_rx_flex_desc_comms_ovs {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 flow_id;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 rss_hash;
+	union {
+		struct {
+			__le16 aux0;
+			__le16 aux1;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum iavf_rxdid {
+	IAVF_RXDID_LEGACY_0		= 0,
+	IAVF_RXDID_LEGACY_1		= 1,
+	IAVF_RXDID_FLEX_NIC		= 2,
+	IAVF_RXDID_FLEX_NIC_2		= 6,
+	IAVF_RXDID_HW			= 7,
+	IAVF_RXDID_COMMS_GENERIC	= 16,
+	IAVF_RXDID_COMMS_AUX_VLAN	= 17,
+	IAVF_RXDID_COMMS_AUX_IPV4	= 18,
+	IAVF_RXDID_COMMS_AUX_IPV6	= 19,
+	IAVF_RXDID_COMMS_AUX_IPV6_FLOW	= 20,
+	IAVF_RXDID_COMMS_AUX_TCP	= 21,
+	IAVF_RXDID_COMMS_OVS_1		= 22,
+	IAVF_RXDID_COMMS_OVS_2		= 23,
+	IAVF_RXDID_LAST			= 63,
+};
+
+enum iavf_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	IAVF_RX_FLEX_DESC_STATUS0_EOF_S,
+	IAVF_RX_FLEX_DESC_STATUS0_HBO_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LPBK_S,
+	IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RXE_S,
+	IAVF_RX_FLEX_DESC_STATUS0_CRCP_S,
+	IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	IAVF_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+/* for iavf_32b_rx_flex_desc.ptype_flex_flags0 member */
+#define IAVF_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for iavf_32b_rx_flex_desc.pkt_len member */
+#define IAVF_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
 int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			   uint16_t queue_idx,
 			   uint16_t nb_desc,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 02/11] net/iavf: return error if opcode is mismatched
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
                     ` (9 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Adds error return when the opcode of read message is
mismatched which is received from adminQ.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_vchnl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index fa4da3a6d..b7fb05d32 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len,
 	PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d",
 		    opcode, vf->cmd_retval);
 
-	if (opcode != vf->pend_cmd)
+	if (opcode != vf->pend_cmd) {
 		PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u",
 			    vf->pend_cmd, opcode);
+		return IAVF_ERR_OPCODE_MISMATCH;
+	}
 
 	return IAVF_SUCCESS;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 03/11] net/iavf: flexible Rx descriptor support in normal path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
                     ` (8 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in normal
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h        |   2 +
 drivers/net/iavf/iavf_ethdev.c |   8 +
 drivers/net/iavf/iavf_rxtx.c   | 485 ++++++++++++++++++++++++++++++---
 drivers/net/iavf/iavf_rxtx.h   |   9 +
 drivers/net/iavf/iavf_vchnl.c  |  55 +++-
 5 files changed, 521 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 368ddf604..fdb31b929 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -103,6 +103,7 @@ struct iavf_info {
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
+	uint64_t supported_rxdid;
 
 	volatile enum virtchnl_ops pend_cmd; /* pending command not finished */
 	uint32_t cmd_retval; /* return value of the cmd response from PF */
@@ -235,6 +236,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter);
 int iavf_configure_rss_lut(struct iavf_adapter *adapter);
 int iavf_configure_rss_key(struct iavf_adapter *adapter);
 int iavf_configure_queues(struct iavf_adapter *adapter);
+int iavf_get_supported_rxdid(struct iavf_adapter *adapter);
 int iavf_config_irq_map(struct iavf_adapter *adapter);
 void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add);
 int iavf_dev_link_update(struct rte_eth_dev *dev,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 9dadee3d6..4280a92bb 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1245,6 +1245,14 @@ iavf_init_vf(struct rte_eth_dev *dev)
 			goto err_rss;
 		}
 	}
+
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+		if (iavf_get_supported_rxdid(adapter) != 0) {
+			PMD_INIT_LOG(ERR, "failed to do get supported rxdid");
+			goto err_rss;
+		}
+	}
+
 	return 0;
 err_rss:
 	rte_free(vf->rss_key);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 9eccb7c41..2eac1de2b 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -346,6 +346,14 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 		return -ENOMEM;
 	}
 
+	if (vf->vf_res->vf_cap_flags &
+	    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+	    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+		rxq->rxdid = IAVF_RXDID_COMMS_OVS_1;
+	} else {
+		rxq->rxdid = IAVF_RXDID_LEGACY_1;
+	}
+
 	rxq->mp = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_free_thresh;
@@ -720,6 +728,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
 	}
 }
 
+static inline void
+iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb,
+			  volatile union iavf_rx_flex_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.status_error0) &
+		(1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.l2tag1);
+	} else {
+		mb->vlan_tci = 0;
+	}
+}
+
 /* Translate the rx descriptor status and error fields to pkt flags */
 static inline uint64_t
 iavf_rxd_to_pkt_flags(uint64_t qword)
@@ -754,6 +776,87 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+/* Translate the rx flex descriptor status to pkt flags */
+static inline void
+iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
+		       volatile union iavf_rx_flex_desc *rxdp)
+{
+	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
+			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+	uint16_t stat_err;
+
+	stat_err = rte_le_to_cpu_16(desc->status_error0);
+	if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) {
+		mb->ol_flags |= PKT_RX_RSS_HASH;
+		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
+	}
+}
+#endif
+
+#define IAVF_RX_FLEX_ERR0_BITS	\
+	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) |	\
+	 (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S))
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
+{
+	uint64_t flags = 0;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S))))
+		return 0;
+
+	if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
+/* If the number of free RX descriptors is greater than the RX free
+ * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+ * register. Update the RDT with the value of the last processed RX
+ * descriptor minus 1, to guarantee that the RDT register is never
+ * equal to the RDH register, which creates a "full" ring situtation
+ * from the hardware point of view.
+ */
+static inline void
+iavf_update_rx_tail(struct iavf_rx_queue *rxq, uint16_t nb_hold, uint16_t rx_id)
+{
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+
+	if (nb_hold > rxq->rx_free_thresh) {
+		PMD_RX_LOG(DEBUG,
+			   "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u",
+			   rxq->port_id, rxq->queue_id, rx_id, nb_hold);
+		rx_id = (uint16_t)((rx_id == 0) ?
+			(rxq->nb_rx_desc - 1) : (rx_id - 1));
+		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+}
+
 /* implement recv_pkts */
 uint16_t
 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -854,23 +957,260 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	}
 	rxq->rx_tail = rx_id;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the receive tail register of queue.
-	 * Update that register with the value of the last processed RX
-	 * descriptor minus 1.
-	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)((rx_id == 0) ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_pkts_flex_rxd(void *rx_queue,
+			struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	volatile union iavf_rx_desc *rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct iavf_rx_queue *rxq;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_eth_dev *dev;
+	struct rte_mbuf *rxm;
+	struct rte_mbuf *nmb;
+	uint16_t nb_rx;
+	uint16_t rx_stat_err0;
+	uint16_t rx_packet_len;
+	uint16_t rx_id, nb_hold;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl;
+
+	nb_rx = 0;
+	nb_hold = 0;
+	rxq = rx_queue;
+	rx_id = rxq->rx_tail;
+	rx_ring = rxq->rx_ring;
+	ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit first */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		rxm->ol_flags = 0;
+		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		iavf_rxd_to_pkt_fields(rxm, &rxd);
+#endif
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+		rxm->ol_flags |= pkt_flags;
+
+		rx_pkts[nb_rx++] = rxm;
 	}
-	rxq->nb_rx_hold = nb_hold;
+	rxq->rx_tail = rx_id;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+/* implement recv_scattered_pkts for flexible Rx descriptor */
+uint16_t
+iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	union iavf_rx_flex_desc rxd;
+	struct rte_mbuf *rxe;
+	struct rte_mbuf *first_seg = rxq->pkt_first_seg;
+	struct rte_mbuf *last_seg = rxq->pkt_last_seg;
+	struct rte_mbuf *nmb, *rxm;
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
+	struct rte_eth_dev *dev;
+	uint16_t rx_stat_err0;
+	uint64_t dma_addr;
+	uint64_t pkt_flags;
+
+	volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union iavf_rx_flex_desc *rxdp;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
+		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+		/* Check the DD bit */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+			break;
+		IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
+
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
+				   "queue_id=%u", rxq->port_id, rxq->queue_id);
+			dev = &rte_eth_devices[rxq->port_id];
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+
+		rxd = *rxdp;
+		nb_hold++;
+		rxe = rxq->sw_ring[rx_id];
+		rx_id++;
+		if (rx_id == rxq->nb_rx_desc)
+			rx_id = 0;
+
+		/* Prefetch next mbuf */
+		rte_prefetch0(rxq->sw_ring[rx_id]);
+
+		/* When next RX descriptor is on a cache line boundary,
+		 * prefetch the next 4 RX descriptors and next 8 pointers
+		 * to mbufs.
+		 */
+		if ((rx_id & 0x3) == 0) {
+			rte_prefetch0(&rx_ring[rx_id]);
+			rte_prefetch0(rxq->sw_ring[rx_id]);
+		}
+
+		rxm = rxe;
+		rxe = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/* Set data buffer address and data length of the mbuf */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+		rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M;
+		rxm->data_len = rx_packet_len;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+
+		/* If this is the first buffer of the received packet, set the
+		 * pointer to the first mbuf of the packet and initialize its
+		 * context. Otherwise, update the total length and the number
+		 * of segments of the current scattered packet, and update the
+		 * pointer to the last mbuf of the current packet.
+		 */
+		if (!first_seg) {
+			first_seg = rxm;
+			first_seg->nb_segs = 1;
+			first_seg->pkt_len = rx_packet_len;
+		} else {
+			first_seg->pkt_len =
+				(uint16_t)(first_seg->pkt_len +
+						rx_packet_len);
+			first_seg->nb_segs++;
+			last_seg->next = rxm;
+		}
+
+		/* If this is not the last buffer of the received packet,
+		 * update the pointer to the last mbuf of the current scattered
+		 * packet and continue to parse the RX ring.
+		 */
+		if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) {
+			last_seg = rxm;
+			continue;
+		}
+
+		/* This is the last buffer of the received packet. If the CRC
+		 * is not stripped by the hardware:
+		 *  - Subtract the CRC length from the total packet length.
+		 *  - If the last buffer only contains the whole CRC or a part
+		 *  of it, free the mbuf associated to the last buffer. If part
+		 *  of the CRC is also contained in the previous mbuf, subtract
+		 *  the length of that CRC part from the data length of the
+		 *  previous mbuf.
+		 */
+		rxm->next = NULL;
+		if (unlikely(rxq->crc_len > 0)) {
+			first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
+			if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
+				rte_pktmbuf_free_seg(rxm);
+				first_seg->nb_segs--;
+				last_seg->data_len =
+					(uint16_t)(last_seg->data_len -
+					(RTE_ETHER_CRC_LEN - rx_packet_len));
+				last_seg->next = NULL;
+			} else {
+				rxm->data_len = (uint16_t)(rx_packet_len -
+							RTE_ETHER_CRC_LEN);
+			}
+		}
+
+		first_seg->port = rxq->port_id;
+		first_seg->ol_flags = 0;
+		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
+		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		iavf_rxd_to_pkt_fields(first_seg, &rxd);
+#endif
+		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
+
+		first_seg->ol_flags |= pkt_flags;
+
+		/* Prefetch data of first segment, if configured to do so. */
+		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+					  first_seg->data_off));
+		rx_pkts[nb_rx++] = first_seg;
+		first_seg = NULL;
+	}
+
+	/* Record index of the next RX descriptor to probe. */
+	rxq->rx_tail = rx_id;
+	rxq->pkt_first_seg = first_seg;
+	rxq->pkt_last_seg = last_seg;
+
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
 
 	return nb_rx;
 }
@@ -1027,30 +1367,90 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->pkt_first_seg = first_seg;
 	rxq->pkt_last_seg = last_seg;
 
-	/* If the number of free RX descriptors is greater than the RX free
-	 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
-	 * register. Update the RDT with the value of the last processed RX
-	 * descriptor minus 1, to guarantee that the RDT register is never
-	 * equal to the RDH register, which creates a "full" ring situtation
-	 * from the hardware point of view.
+	iavf_update_rx_tail(rxq, nb_hold, rx_id);
+
+	return nb_rx;
+}
+
+#define IAVF_LOOK_AHEAD 8
+static inline int
+iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **rxep;
+	struct rte_mbuf *mb;
+	uint16_t stat_err0;
+	uint16_t pkt_len;
+	int32_t s[IAVF_LOOK_AHEAD], nb_dd;
+	int32_t i, j, nb_rx = 0;
+	uint64_t pkt_flags;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+
+	rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail];
+	rxep = &rxq->sw_ring[rxq->rx_tail];
+
+	stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
+
+	/* Make sure there is at least 1 packet to receive */
+	if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* Scan LOOK_AHEAD descriptors at a time to determine which
+	 * descriptors reference packets that are ready to be received.
 	 */
-	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
-	if (nb_hold > rxq->rx_free_thresh) {
-		PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
-			   "nb_hold=%u nb_rx=%u",
-			   rxq->port_id, rxq->queue_id,
-			   rx_id, nb_hold, nb_rx);
-		rx_id = (uint16_t)(rx_id == 0 ?
-			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
-		nb_hold = 0;
+	for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
+	     rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
+		/* Read desc statuses backwards to avoid race condition */
+		for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--)
+			s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+
+		rte_smp_rmb();
+
+		/* Compute how many status bits were set */
+		for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
+			nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S);
+
+		nb_rx += nb_dd;
+
+		/* Translate descriptor info to mbuf parameters */
+		for (j = 0; j < nb_dd; j++) {
+			IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
+					  rxq->rx_tail +
+					  i * IAVF_LOOK_AHEAD + j);
+
+			mb = rxep[j];
+			pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) &
+				IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len;
+			mb->data_len = pkt_len;
+			mb->pkt_len = pkt_len;
+			mb->ol_flags = 0;
+
+			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
+				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
+			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
+#endif
+			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
+			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
+
+			mb->ol_flags |= pkt_flags;
+		}
+
+		for (j = 0; j < IAVF_LOOK_AHEAD; j++)
+			rxq->rx_stage[i + j] = rxep[j];
+
+		if (nb_dd != IAVF_LOOK_AHEAD)
+			break;
 	}
-	rxq->nb_rx_hold = nb_hold;
+
+	/* Clear software ring entries */
+	for (i = 0; i < nb_rx; i++)
+		rxq->sw_ring[rxq->rx_tail + i] = NULL;
 
 	return nb_rx;
 }
 
-#define IAVF_LOOK_AHEAD 8
 static inline int
 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 {
@@ -1219,7 +1619,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (rxq->rx_nb_avail)
 		return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 
-	nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
+	if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1)
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq);
+	else
+		nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
 	rxq->rx_next_avail = 0;
 	rxq->rx_nb_avail = nb_rx;
 	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
@@ -1663,6 +2066,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 {
 	struct iavf_adapter *adapter =
 		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 #ifdef RTE_ARCH_X86
 	struct iavf_rx_queue *rxq;
 	int i;
@@ -1702,7 +2106,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	if (dev->data->scattered_rx) {
 		PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_scattered_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_scattered_pkts;
 	} else if (adapter->rx_bulk_alloc_allowed) {
 		PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
 			    dev->data->port_id);
@@ -1710,7 +2117,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 	} else {
 		PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
 			    dev->data->port_id);
-		dev->rx_pkt_burst = iavf_recv_pkts;
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+			dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd;
+		else
+			dev->rx_pkt_burst = iavf_recv_pkts;
 	}
 }
 
@@ -1797,6 +2207,7 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	rxq = dev->data->rx_queues[queue_id];
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
+
 	while ((desc < rxq->nb_rx_desc) &&
 	       ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		 IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index d04233bf8..8246e797f 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -60,8 +60,10 @@
 /* HW desc structure, both 16-byte and 32-byte types are supported */
 #ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 #define iavf_rx_desc iavf_16byte_rx_desc
+#define iavf_rx_flex_desc iavf_16b_rx_flex_desc
 #else
 #define iavf_rx_desc iavf_32byte_rx_desc
+#define iavf_rx_flex_desc iavf_32b_rx_flex_desc
 #endif
 
 struct iavf_rxq_ops {
@@ -87,6 +89,7 @@ struct iavf_rx_queue {
 	struct rte_mbuf *pkt_first_seg; /* first segment of current packet */
 	struct rte_mbuf *pkt_last_seg;  /* last segment of current packet */
 	struct rte_mbuf fake_mbuf;      /* dummy mbuf */
+	uint8_t rxdid;
 
 	/* used for VPMD */
 	uint16_t rxrearm_nb;       /* number of remaining to be re-armed */
@@ -379,9 +382,15 @@ void iavf_dev_tx_queue_release(void *txq);
 void iavf_stop_queues(struct rte_eth_dev *dev);
 uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		       uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue,
+				 struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts(void *rx_queue,
 				 struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		       uint16_t nb_pkts);
 uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index b7fb05d32..b913f06c3 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args)
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		/* for init virtchnl ops, need to poll the response */
 		do {
 			ret = iavf_read_msg_from_pf(adapter, args->out_size,
@@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	 * add advanced/optional offload capabilities
 	 */
 
-	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+		VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -375,6 +377,32 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 	return 0;
 }
 
+int
+iavf_get_supported_rxdid(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	int ret;
+
+	args.ops = VIRTCHNL_OP_GET_SUPPORTED_RXDIDS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	ret = iavf_execute_vf_cmd(adapter, &args);
+	if (ret) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_SUPPORTED_RXDIDS");
+		return ret;
+	}
+
+	vf->supported_rxdid =
+		((struct virtchnl_supported_rxdids *)args.out_buffer)->supported_rxdids;
+
+	return 0;
+}
+
 int
 iavf_enable_queues(struct iavf_adapter *adapter)
 {
@@ -567,6 +595,31 @@ iavf_configure_queues(struct iavf_adapter *adapter)
 			vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc;
 			vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr;
 			vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len;
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			}
+#else
+			if (vf->vf_res->vf_cap_flags &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC &&
+			    vf->supported_rxdid & BIT(IAVF_RXDID_LEGACY_0)) {
+				vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_0;
+				PMD_DRV_LOG(NOTICE, "request RXDID == %d in "
+					    "Queue[%d]", vc_qp->rxq.rxdid, i);
+			} else {
+				PMD_DRV_LOG(ERR, "RXDID == 0 is not supported");
+				return -1;
+			}
+#endif
 		}
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 04/11] net/iavf: flexible Rx descriptor support in AVX path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (2 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
                     ` (7 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in AVX
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c          |  24 +-
 drivers/net/iavf/iavf_rxtx.h          |   6 +
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 539 ++++++++++++++++++++++++++
 3 files changed, 563 insertions(+), 6 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 2eac1de2b..8a1292823 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2087,16 +2087,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				    "Using %sVector Scattered Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_scattered_pkts_vec_avx2 :
-					    iavf_recv_scattered_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
+					iavf_recv_scattered_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_scattered_pkts_vec_avx2 :
+					iavf_recv_scattered_pkts_vec;
 		} else {
 			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
 				    use_avx2 ? "avx2 " : "",
 				    dev->data->port_id);
-			dev->rx_pkt_burst = use_avx2 ?
-					    iavf_recv_pkts_vec_avx2 :
-					    iavf_recv_pkts_vec;
+			if (vf->vf_res->vf_cap_flags &
+				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2_flex_rxd :
+					iavf_recv_pkts_vec;
+			else
+				dev->rx_pkt_burst = use_avx2 ?
+					iavf_recv_pkts_vec_avx2 :
+					iavf_recv_pkts_vec;
 		}
 
 		return;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 8246e797f..84ec39132 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -414,9 +414,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				 uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					  struct rte_mbuf **rx_pkts,
+					  uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue,
 					   struct rte_mbuf **rx_pkts,
 					   uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+						    struct rte_mbuf **rx_pkts,
+						    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
 uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 2587083d8..04603e7d4 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -614,6 +614,464 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline uint16_t
+_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts, uint8_t *split_packet)
+{
+#define IAVF_DESCS_PER_LOOP_AVX 8
+
+	const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl;
+
+	const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
+			0, rxq->mbuf_initializer);
+	struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail];
+	volatile union iavf_rx_flex_desc *rxdp =
+		(union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+			rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/* constants used in processing loop */
+	const __m256i crc_adjust =
+		_mm256_set_epi16
+			(/* first descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0,          /* ignore pkt_type field */
+			 /* second descriptor */
+			 0, 0, 0,       /* ignore non-length fields */
+			 -rxq->crc_len, /* sub crc on data_len */
+			 0,             /* ignore high-16bits of pkt_len */
+			 -rxq->crc_len, /* sub crc on pkt_len */
+			 0, 0           /* ignore pkt_type field */
+			);
+
+	/* 8 packets DD mask, LSB in each 32-bit value */
+	const __m256i dd_check = _mm256_set1_epi32(1);
+
+	/* 8 packets EOP mask, second-LSB in each 32-bit value */
+	const __m256i eop_check = _mm256_slli_epi32(dd_check,
+			IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+
+	/* mask to shuffle from desc. to mbuf (2 descriptors)*/
+	const __m256i shuf_msk =
+		_mm256_set_epi8
+			(/* first descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF,	/*pkt_type set as unknown */
+			 /* second descriptor */
+			 15, 14,
+			 13, 12,	/* octet 12~15, 32 bits rss */
+			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
+			 5, 4,		/* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
+			 5, 4,		/* octet 4~5, 16 bits pkt_len */
+			 0xFF, 0xFF,	/* pkt_type set as unknown */
+			 0xFF, 0xFF	/*pkt_type set as unknown */
+			);
+	/**
+	 * compile-time check the above crc and shuffle layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi
+	 * calls above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Status/Error flag masks */
+	/**
+	 * mask everything except Checksum Reports, RSS indication
+	 * and VLAN indication.
+	 * bit6:4 for IP/L4 checksum errors.
+	 * bit12 is for RSS indication.
+	 * bit13 is for VLAN indication.
+	 */
+	const __m256i flags_mask =
+		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+	/**
+	 * data to be shuffled by the result of the flags mask shifted by 4
+	 * bits.  This gives use the l3_l4 flags.
+	 */
+	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			/* second 128-bits */
+			0, 0, 0, 0, 0, 0, 0, 0,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i cksum_mask =
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+				   PKT_RX_EIP_CKSUM_BAD);
+	/**
+	 * data to be shuffled by result of flag mask, shifted down 12.
+	 * If RSS(bit12)/VLAN(bit13) are set,
+	 * shuffle moves appropriate flags in place.
+	 */
+	const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0,
+			/* end up 128-bits */
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	uint16_t i, received;
+
+	for (i = 0, received = 0; i < nb_pkts;
+	     i += IAVF_DESCS_PER_LOOP_AVX,
+	     rxdp += IAVF_DESCS_PER_LOOP_AVX) {
+		/* step 1, copy over 8 mbuf pointers to rx_pkts array */
+		_mm256_storeu_si256((void *)&rx_pkts[i],
+				    _mm256_loadu_si256((void *)&sw_ring[i]));
+#ifdef RTE_ARCH_X86_64
+		_mm256_storeu_si256
+			((void *)&rx_pkts[i + 4],
+			 _mm256_loadu_si256((void *)&sw_ring[i + 4]));
+#endif
+
+		__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
+
+		const __m128i raw_desc7 =
+			_mm_load_si128((void *)(rxdp + 7));
+		rte_compiler_barrier();
+		const __m128i raw_desc6 =
+			_mm_load_si128((void *)(rxdp + 6));
+		rte_compiler_barrier();
+		const __m128i raw_desc5 =
+			_mm_load_si128((void *)(rxdp + 5));
+		rte_compiler_barrier();
+		const __m128i raw_desc4 =
+			_mm_load_si128((void *)(rxdp + 4));
+		rte_compiler_barrier();
+		const __m128i raw_desc3 =
+			_mm_load_si128((void *)(rxdp + 3));
+		rte_compiler_barrier();
+		const __m128i raw_desc2 =
+			_mm_load_si128((void *)(rxdp + 2));
+		rte_compiler_barrier();
+		const __m128i raw_desc1 =
+			_mm_load_si128((void *)(rxdp + 1));
+		rte_compiler_barrier();
+		const __m128i raw_desc0 =
+			_mm_load_si128((void *)(rxdp + 0));
+
+		raw_desc6_7 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc6),
+				 raw_desc7, 1);
+		raw_desc4_5 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc4),
+				 raw_desc5, 1);
+		raw_desc2_3 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc2),
+				 raw_desc3, 1);
+		raw_desc0_1 =
+			_mm256_inserti128_si256
+				(_mm256_castsi128_si256(raw_desc0),
+				 raw_desc1, 1);
+
+		if (split_packet) {
+			int j;
+
+			for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++)
+				rte_mbuf_prefetch_part2(rx_pkts[i + j]);
+		}
+
+		/**
+		 * convert descriptors 4-7 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk);
+		__m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk);
+
+		mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust);
+		mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptype_mask =
+			_mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M);
+		const __m256i ptypes6_7 =
+			_mm256_and_si256(raw_desc6_7, ptype_mask);
+		const __m256i ptypes4_5 =
+			_mm256_and_si256(raw_desc4_5, ptype_mask);
+		const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9);
+		const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1);
+		const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9);
+		const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1);
+
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4);
+		mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4);
+		mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0);
+		/* merge the status bits into one register */
+		const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7,
+				raw_desc4_5);
+
+		/**
+		 * convert descriptors 0-3 into mbufs, re-arrange fields.
+		 * Then write into the mbuf.
+		 */
+		__m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk);
+		__m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk);
+
+		mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust);
+		mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust);
+		/**
+		 * to get packet types, ptype is located in bit16-25
+		 * of each 128bits
+		 */
+		const __m256i ptypes2_3 =
+			_mm256_and_si256(raw_desc2_3, ptype_mask);
+		const __m256i ptypes0_1 =
+			_mm256_and_si256(raw_desc0_1, ptype_mask);
+		const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9);
+		const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1);
+		const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9);
+		const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1);
+
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4);
+		mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4);
+		mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0);
+		/* merge the status bits into one register */
+		const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3,
+								raw_desc0_1);
+
+		/**
+		 * take the two sets of status bits and merge to one
+		 * After merge, the packets status flags are in the
+		 * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6]
+		 */
+		__m256i status0_7 = _mm256_unpacklo_epi64(status4_7,
+							  status0_3);
+
+		/* now do flag manipulation */
+
+		/* get only flag/error bits we want */
+		const __m256i flag_bits =
+			_mm256_and_si256(status0_7, flags_mask);
+		/**
+		 * l3_l4_error flags, shuffle, then shift to correct adjustment
+		 * of flags in flags_shuf, and finally mask out extra bits
+		 */
+		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
+				_mm256_srli_epi32(flag_bits, 4));
+		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
+		/* set rss and vlan flags */
+		const __m256i rss_vlan_flag_bits =
+			_mm256_srli_epi32(flag_bits, 12);
+		const __m256i rss_vlan_flags =
+			_mm256_shuffle_epi8(rss_vlan_flags_shuf,
+					    rss_vlan_flag_bits);
+
+		/* merge flags */
+		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+				rss_vlan_flags);
+		/**
+		 * At this point, we have the 8 sets of flags in the low 16-bits
+		 * of each 32-bit value in vlan0.
+		 * We want to extract these, and merge them with the mbuf init
+		 * data so we can do a single write to the mbuf to set the flags
+		 * and all the other initialization fields. Extracting the
+		 * appropriate flags means that we have to do a shift and blend
+		 * for each mbuf before we do the write. However, we can also
+		 * add in the previously computed rx_descriptor fields to
+		 * make a single 256-bit write per mbuf
+		 */
+		/* check the structure matches expectations */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+				 offsetof(struct rte_mbuf, rearm_data) + 8);
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+				 RTE_ALIGN(offsetof(struct rte_mbuf,
+						    rearm_data),
+					   16));
+		/* build up data and do writes */
+		__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5,
+			rearm6, rearm7;
+		rearm6 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 8),
+					    0x04);
+		rearm4 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(mbuf_flags, 4),
+					    0x04);
+		rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04);
+		rearm0 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(mbuf_flags, 4),
+					    0x04);
+		/* permute to add in the rx_descriptor e.g. rss fields */
+		rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20);
+		rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20);
+		rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20);
+		rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20);
+		/* write to mbuf */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data,
+				    rearm6);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data,
+				    rearm4);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data,
+				    rearm2);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data,
+				    rearm0);
+
+		/* repeat for the odd mbufs */
+		const __m256i odd_flags =
+			_mm256_castsi128_si256
+				(_mm256_extracti128_si256(mbuf_flags, 1));
+		rearm7 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 8),
+					    0x04);
+		rearm5 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_slli_si256(odd_flags, 4),
+					    0x04);
+		rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04);
+		rearm1 = _mm256_blend_epi32(mbuf_init,
+					    _mm256_srli_si256(odd_flags, 4),
+					    0x04);
+		/* since odd mbufs are already in hi 128-bits use blend */
+		rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0);
+		rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0);
+		rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0);
+		rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0);
+		/* again write to mbufs */
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data,
+				    rearm7);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data,
+				    rearm5);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data,
+				    rearm3);
+		_mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data,
+				    rearm1);
+
+		/* extract and record EOP bit */
+		if (split_packet) {
+			const __m128i eop_mask =
+				_mm_set1_epi16(1 <<
+					       IAVF_RX_FLEX_DESC_STATUS0_EOF_S);
+			const __m256i eop_bits256 = _mm256_and_si256(status0_7,
+								     eop_check);
+			/* pack status bits into a single 128-bit register */
+			const __m128i eop_bits =
+				_mm_packus_epi32
+					(_mm256_castsi256_si128(eop_bits256),
+					 _mm256_extractf128_si256(eop_bits256,
+								  1));
+			/**
+			 * flip bits, and mask out the EOP bit, which is now
+			 * a split-packet bit i.e. !EOP, rather than EOP one.
+			 */
+			__m128i split_bits = _mm_andnot_si128(eop_bits,
+					eop_mask);
+			/**
+			 * eop bits are out of order, so we need to shuffle them
+			 * back into order again. In doing so, only use low 8
+			 * bits, which acts like another pack instruction
+			 * The original order is (hi->lo): 1,3,5,7,0,2,4,6
+			 * [Since we use epi8, the 16-bit positions are
+			 * multiplied by 2 in the eop_shuffle value.]
+			 */
+			__m128i eop_shuffle =
+				_mm_set_epi8(/* zero hi 64b */
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     0xFF, 0xFF, 0xFF, 0xFF,
+					     /* move values to lo 64b */
+					     8, 0, 10, 2,
+					     12, 4, 14, 6);
+			split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle);
+			*(uint64_t *)split_packet =
+				_mm_cvtsi128_si64(split_bits);
+			split_packet += IAVF_DESCS_PER_LOOP_AVX;
+		}
+
+		/* perform dd_check */
+		status0_7 = _mm256_and_si256(status0_7, dd_check);
+		status0_7 = _mm256_packs_epi32(status0_7,
+					       _mm256_setzero_si256());
+
+		uint64_t burst = __builtin_popcountll
+					(_mm_cvtsi128_si64
+						(_mm256_extracti128_si256
+							(status0_7, 1)));
+		burst += __builtin_popcountll
+				(_mm_cvtsi128_si64
+					(_mm256_castsi256_si128(status0_7)));
+		received += burst;
+		if (burst != IAVF_DESCS_PER_LOOP_AVX)
+			break;
+	}
+
+	/* update tail pointers */
+	rxq->rx_tail += received;
+	rxq->rx_tail &= (rxq->nb_rx_desc - 1);
+	if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */
+		rxq->rx_tail--;
+		received--;
+	}
+	rxq->rxrearm_nb += received;
+	return received;
+}
+
 /**
  * Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
@@ -625,6 +1083,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/**
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				 uint16_t nb_pkts)
+{
+	return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts,
+						     nb_pkts, NULL);
+}
+
 /**
  * vPMD receive routine that reassembles single burst of 32 scattered packets
  * Notice:
@@ -690,6 +1160,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
 				rx_pkts + retval, nb_pkts);
 }
 
+/**
+ * vPMD receive routine that reassembles single burst of
+ * 32 scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+static uint16_t
+iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue,
+					    struct rte_mbuf **rx_pkts,
+					    uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq,
+					rx_pkts, nb_pkts, split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	unsigned int i = 0;
+
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+					     &split_flags[i]);
+}
+
+/**
+ * vPMD receive routine that reassembles scattered packets for flex RxD.
+ * Main receive routine that can handle arbitrary burst sizes
+ * Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue,
+					   struct rte_mbuf **rx_pkts,
+					   uint16_t nb_pkts)
+{
+	uint16_t retval = 0;
+
+	while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) {
+		uint16_t burst =
+			iavf_recv_scattered_burst_vec_avx2_flex_rxd
+			(rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST);
+		retval += burst;
+		nb_pkts -= burst;
+		if (burst < IAVF_VPMD_RX_MAX_BURST)
+			return retval;
+	}
+	return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue,
+				rx_pkts + retval, nb_pkts);
+}
+
 static inline void
 iavf_vtx1(volatile struct iavf_tx_desc *txdp,
 	  struct rte_mbuf *pkt, uint64_t flags)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 05/11] net/iavf: flexible Rx descriptor support in SSE path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (3 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 06/11] net/iavf: add flow director enabled switch value Leyi Rong
                     ` (6 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support flexible Rx descriptor format in SSE
path of iAVF PMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx.c         |   4 +-
 drivers/net/iavf/iavf_rxtx.h         |   5 +
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++
 3 files changed, 421 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 8a1292823..92bf4cf8b 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2091,7 +2091,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2_flex_rxd :
-					iavf_recv_scattered_pkts_vec;
+					iavf_recv_scattered_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_scattered_pkts_vec_avx2 :
@@ -2104,7 +2104,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 				VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2_flex_rxd :
-					iavf_recv_pkts_vec;
+					iavf_recv_pkts_vec_flex_rxd;
 			else
 				dev->rx_pkt_burst = use_avx2 ?
 					iavf_recv_pkts_vec_avx2 :
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 84ec39132..9bc857312 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -407,9 +407,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset);
 
 uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			   uint16_t nb_pkts);
+uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+				     uint16_t nb_pkts);
 uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
 				     struct rte_mbuf **rx_pkts,
 				     uint16_t nb_pkts);
+uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+					       struct rte_mbuf **rx_pkts,
+					       uint16_t nb_pkts);
 uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 				  uint16_t nb_pkts);
 uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 0365c49e1..9c1f2a445 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline void
+flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
+		       struct rte_mbuf **rx_pkts)
+{
+	const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
+	__m128i rearm0, rearm1, rearm2, rearm3;
+
+	__m128i tmp_desc, flags, rss_vlan;
+
+	/* mask everything except checksum, RSS and VLAN flags.
+	 * bit6:4 for checksum.
+	 * bit12 for RSS indication.
+	 * bit13 for VLAN indication.
+	 */
+	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
+						0x3070, 0x3070);
+
+	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD,
+						 PKT_RX_IP_CKSUM_MASK |
+						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_EIP_CKSUM_BAD);
+
+	/* map the checksum, rss and vlan fields to the checksum, rss
+	 * and vlan flag
+	 */
+	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
+			/* shift right 1 bit to make sure it not exceed 255 */
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+			 PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+
+	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
+			0, 0, 0, 0,
+			0, 0, 0, 0,
+			PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
+			PKT_RX_RSS_HASH, 0);
+
+	/* merge 4 descriptors */
+	flags = _mm_unpackhi_epi32(descs[0], descs[1]);
+	tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]);
+	tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc);
+	tmp_desc = _mm_and_si128(flags, desc_mask);
+
+	/* checksum flags */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 4);
+	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
+	/* then we shift left 1 bit */
+	flags = _mm_slli_epi32(flags, 1);
+	/* we need to mask out the reduntant bits introduced by RSS or
+	 * VLAN fields.
+	 */
+	flags = _mm_and_si128(flags, cksum_mask);
+
+	/* RSS, VLAN flag */
+	tmp_desc = _mm_srli_epi32(tmp_desc, 8);
+	rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc);
+
+	/* merge the flags */
+	flags = _mm_or_si128(flags, rss_vlan);
+
+	/**
+	 * At this point, we have the 4 sets of flags in the low 16-bits
+	 * of each 32-bit value in flags.
+	 * We want to extract these, and merge them with the mbuf init data
+	 * so we can do a single 16-byte write to the mbuf to set the flags
+	 * and all the other initialization fields. Extracting the
+	 * appropriate flags means that we have to do a shift and blend for
+	 * each mbuf before we do the write.
+	 */
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+
+	/* write the rearm data and the olflags in one write */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_mbuf, rearm_data) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
+			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+	_mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+	_mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+	_mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
+}
+
 #define PKTLEN_SHIFT     10
 
 static inline void
@@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
 	rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)];
 }
 
+static inline void
+flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
+		     const uint32_t *type_table)
+{
+	const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M,
+						 0, IAVF_RX_FLEX_DESC_PTYPE_M);
+	__m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]);
+	__m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]);
+	__m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23);
+
+	ptype_all = _mm_and_si128(ptype_all, ptype_mask);
+
+	rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)];
+	rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)];
+	rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)];
+	rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)];
+}
+
 /* Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	return nb_pkts_recd;
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+static inline uint16_t
+_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
+			    struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts, uint8_t *split_packet)
+{
+	volatile union iavf_rx_flex_desc *rxdp;
+	struct rte_mbuf **sw_ring;
+	uint16_t nb_pkts_recd;
+	int pos;
+	uint64_t var;
+	const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	__m128i crc_adjust = _mm_set_epi16
+				(0, 0, 0,       /* ignore non-length fields */
+				 -rxq->crc_len, /* sub crc on data_len */
+				 0,          /* ignore high-16bits of pkt_len */
+				 -rxq->crc_len, /* sub crc on pkt_len */
+				 0, 0           /* ignore pkt_type field */
+				);
+	const __m128i zero = _mm_setzero_si128();
+	/* mask to shuffle from desc. to mbuf */
+	const __m128i shuf_msk = _mm_set_epi8
+			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
+			 5, 4,        /* octet 4~5, 16 bits data_len */
+			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
+			 5, 4,        /* octet 4~5, low 16 bits pkt_len */
+			 0xFF, 0xFF,  /* pkt_type set as unknown */
+			 0xFF, 0xFF   /* pkt_type set as unknown */
+			);
+	const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0xFF, 0xFF,
+						   0x04, 0x0C,
+						   0x00, 0x08);
+
+	/**
+	 * compile-time check the above crc_adjust layout is correct.
+	 * NOTE: the first field (lowest address) is given last in set_epi16
+	 * call above.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+
+	/* 4 packets DD mask */
+	const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL,
+						0x0000000100000001LL);
+	/* 4 packets EOP mask */
+	const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
+						 0x0000000200000002LL);
+
+	/* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */
+	nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST);
+
+	/* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */
+	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP);
+
+	/* Just the act of getting into the function from the application is
+	 * going to cost about 7 cycles
+	 */
+	rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail;
+
+	rte_prefetch0(rxdp);
+
+	/* See if we need to rearm the RX queue - gives the prefetch a bit
+	 * of time to act
+	 */
+	if (rxq->rxrearm_nb > rxq->rx_free_thresh)
+		iavf_rxq_rearm(rxq);
+
+	/* Before we start moving massive data around, check to see if
+	 * there is actually a packet available
+	 */
+	if (!(rxdp->wb.status_error0 &
+	      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))
+		return 0;
+
+	/**
+	 * Compile-time verify the shuffle mask
+	 * NOTE: some field positions already verified above, but duplicated
+	 * here for completeness in case of future modifications.
+	 */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10);
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
+			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+	/* Cache is empty -> need to scan the buffer rings, but first move
+	 * the next 'n' mbufs into the cache
+	 */
+	sw_ring = &rxq->sw_ring[rxq->rx_tail];
+
+	/* A. load 4 packet in one loop
+	 * [A*. mask out 4 unused dirty field in desc]
+	 * B. copy 4 mbuf point from swring to rx_pkts
+	 * C. calc the number of DD bits among the 4 packets
+	 * [C*. extract the end-of-packet bit, if requested]
+	 * D. fill info. from desc to mbuf
+	 */
+
+	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
+	     pos += IAVF_VPMD_DESCS_PER_LOOP,
+	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
+		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
+		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i staterr, sterr_tmp1, sterr_tmp2;
+		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
+		__m128i mbp1;
+#if defined(RTE_ARCH_X86_64)
+		__m128i mbp2;
+#endif
+
+		/* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */
+		mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+		/* Read desc statuses backwards to avoid race condition */
+		/* A.1 load 4 pkts desc */
+		descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+		rte_compiler_barrier();
+
+		/* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.1 load 2 64 bit mbuf points */
+		mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+#endif
+
+		descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+		rte_compiler_barrier();
+		/* B.1 load 2 mbuf point */
+		descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+		rte_compiler_barrier();
+		descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+#if defined(RTE_ARCH_X86_64)
+		/* B.2 copy 2 mbuf point into rx_pkts  */
+		_mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
+#endif
+
+		if (split_packet) {
+			rte_mbuf_prefetch_part2(rx_pkts[pos]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
+		}
+
+		/* avoid compiler reorder optimization */
+		rte_compiler_barrier();
+
+		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
+		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
+		/* C.1 4=>2 filter staterr info only */
+		sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
+
+		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
+
+		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
+		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
+		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		/* D.3 copy final 3,4 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
+			 pkt_mb4);
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
+			 pkt_mb3);
+
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+
+		/* C* extract and record EOP bit */
+		if (split_packet) {
+			/* and with mask to extract bits, flipping 1-0 */
+			__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);
+			/* the staterr values are not in order, as the count
+			 * count of dd bits doesn't care. However, for end of
+			 * packet tracking, we do care, so shuffle. This also
+			 * compresses the 32-bit values to 8-bit
+			 */
+			eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);
+			/* store the resulting 32-bit value */
+			*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
+			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
+		}
+
+		/* C.3 calc available number of desc */
+		staterr = _mm_and_si128(staterr, dd_check);
+		staterr = _mm_packs_epi32(staterr, zero);
+
+		/* D.3 copy final 1,2 data to rx_pkts */
+		_mm_storeu_si128
+			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
+			 pkt_mb2);
+		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
+				 pkt_mb1);
+		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
+		/* C.4 calc avaialbe number of desc */
+		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
+		nb_pkts_recd += var;
+		if (likely(var != IAVF_VPMD_DESCS_PER_LOOP))
+			break;
+	}
+
+	/* Update our internal tail pointer */
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
+	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
+	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
+
+	return nb_pkts_recd;
+}
+
 /* Notice:
  * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
@@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 }
 
+/* Notice:
+ * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
+			    uint16_t nb_pkts)
+{
+	return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL);
+}
+
 /* vPMD receive routine that reassembles scattered packets
  * Notice:
  * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
@@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		&split_flags[i]);
 }
 
+/* vPMD receive routine that reassembles scattered packets for flex RxD
+ * Notice:
+ * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
+ * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST
+ *   numbers of DD bits
+ */
+uint16_t
+iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
+				      struct rte_mbuf **rx_pkts,
+				      uint16_t nb_pkts)
+{
+	struct iavf_rx_queue *rxq = rx_queue;
+	uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0};
+	unsigned int i = 0;
+
+	/* get some new buffers */
+	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
+					      split_flags);
+	if (nb_bufs == 0)
+		return 0;
+
+	/* happy day case, full burst + no packets to be joined */
+	const uint64_t *split_fl64 = (uint64_t *)split_flags;
+
+	if (!rxq->pkt_first_seg &&
+	    split_fl64[0] == 0 && split_fl64[1] == 0 &&
+	    split_fl64[2] == 0 && split_fl64[3] == 0)
+		return nb_bufs;
+
+	/* reassemble any packets that need reassembly*/
+	if (!rxq->pkt_first_seg) {
+		/* find the first split flag, and only reassemble then*/
+		while (i < nb_bufs && !split_flags[i])
+			i++;
+		if (i == nb_bufs)
+			return nb_bufs;
+		rxq->pkt_first_seg = rx_pkts[i];
+	}
+	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
+		&split_flags[i]);
+}
+
 static inline void
 vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags)
 {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 06/11] net/iavf: add flow director enabled switch value
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (4 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 07/11] net/iavf: support flow mark in normal data path Leyi Rong
                     ` (5 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

The commit adds fdir_enabled flag into iavf_rx_queue structure
to identify if fdir id is active. Rx data path can be benefit if
fdir id parsing is not needed, especially in vector path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  1 +
 drivers/net/iavf/iavf_rxtx.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index fdb31b929..17ceff734 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -144,6 +144,7 @@ struct iavf_adapter {
 	bool tx_vec_allowed;
 	const uint32_t *ptype_tbl;
 	bool stopped;
+	uint16_t fdir_ref_cnt;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 9bc857312..73968847f 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -104,6 +104,7 @@ struct iavf_rx_queue {
 
 	uint16_t port_id;        /* device port ID */
 	uint8_t crc_len;        /* 0 if CRC stripped, 4 otherwise */
+	uint8_t fdir_enabled;   /* 0 if FDIR disabled, 1 when enabled */
 	uint16_t queue_id;      /* Rx queue index */
 	uint16_t rx_buf_len;    /* The packet buffer size */
 	uint16_t rx_hdr_len;    /* The header buffer size */
@@ -491,6 +492,35 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq,
 	       tx_desc->cmd_type_offset_bsz);
 }
 
+#define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \
+	int i; \
+	for (i = 0; i < (ad)->eth_dev->data->nb_rx_queues; i++) { \
+		struct iavf_rx_queue *rxq = (ad)->eth_dev->data->rx_queues[i]; \
+		if (!rxq) \
+			continue; \
+		rxq->fdir_enabled = on; \
+	} \
+	PMD_DRV_LOG(DEBUG, "FDIR processing on RX set to %d", on); \
+} while (0)
+
+/* Enable/disable flow director Rx processing in data path. */
+static inline
+void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on)
+{
+	if (on) {
+		/* enable flow director processing */
+		if (ad->fdir_ref_cnt++ == 0)
+			FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+	} else {
+		if (ad->fdir_ref_cnt >= 1) {
+			ad->fdir_ref_cnt--;
+
+			if (ad->fdir_ref_cnt == 0)
+				FDIR_PROC_ENABLE_PER_QUEUE(ad, on);
+		}
+	}
+}
+
 #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC
 #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \
 	iavf_dump_rx_descriptor(rxq, desc, rx_id)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 07/11] net/iavf: support flow mark in normal data path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (5 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 06/11] net/iavf: add flow director enabled switch value Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 08/11] net/iavf: support flow mark in AVX path Leyi Rong
                     ` (4 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing in normal path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf.h      |  3 +++
 drivers/net/iavf/iavf_rxtx.c | 51 +++++++++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 17ceff734..78bdaff20 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -67,6 +67,9 @@
 #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define IAVF_48_BIT_MASK  RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t)
 
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
+#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 92bf4cf8b..42b5f398e 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -756,6 +756,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 					IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
 			IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 
+	/* Check if FDIR Match */
+	flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ?
+				PKT_RX_FDIR : 0);
+
 	if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
 		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 		return flags;
@@ -776,7 +780,31 @@ iavf_rxd_to_pkt_flags(uint64_t qword)
 	return flags;
 }
 
+static inline uint64_t
+iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb)
+{
+	uint64_t flags = 0;
 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+	uint16_t flexbh;
+
+	flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
+		IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK;
+
+	if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
+		mb->hash.fdir.hi =
+			rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
+		flags |= PKT_RX_FDIR_ID;
+	}
+#else
+	mb->hash.fdir.hi =
+		rte_le_to_cpu_32(rxdp->wb.qword0.hi_dword.fd_id);
+	flags |= PKT_RX_FDIR_ID;
+#endif
+	return flags;
+}
+
+
 /* Translate the rx flex descriptor status to pkt flags */
 static inline void
 iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
@@ -784,6 +812,7 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 {
 	volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc =
 			(volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 	uint16_t stat_err;
 
 	stat_err = rte_le_to_cpu_16(desc->status_error0);
@@ -791,9 +820,14 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb,
 		mb->ol_flags |= PKT_RX_RSS_HASH;
 		mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
 	}
-}
 #endif
 
+	if (desc->flow_id != 0xFFFFFFFF) {
+		mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+		mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
+	}
+}
+
 #define IAVF_RX_FLEX_ERR0_BITS	\
 	((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) |	\
 	 (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
@@ -951,6 +985,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rxm->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm);
+
 		rxm->ol_flags |= pkt_flags;
 
 		rx_pkts[nb_rx++] = rxm;
@@ -1047,9 +1084,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
 		rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(rxm, &rxd);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 		iavf_rxd_to_pkt_fields(rxm, &rxd);
-#endif
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 		rxm->ol_flags |= pkt_flags;
 
@@ -1191,9 +1226,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts,
 		first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
 			rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)];
 		iavf_flex_rxd_to_vlan_tci(first_seg, &rxd);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 		iavf_rxd_to_pkt_fields(first_seg, &rxd);
-#endif
 		pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
 		first_seg->ol_flags |= pkt_flags;
@@ -1353,6 +1386,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			first_seg->hash.rss =
 				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 
+		if (pkt_flags & PKT_RX_FDIR)
+			pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg);
+
 		first_seg->ol_flags |= pkt_flags;
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1428,9 +1464,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq)
 			mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M &
 				rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)];
 			iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]);
-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 			iavf_rxd_to_pkt_fields(mb, &rxdp[j]);
-#endif
 			stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0);
 			pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0);
 
@@ -1521,6 +1555,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
 				mb->hash.rss = rte_le_to_cpu_32(
 					rxdp[j].wb.qword0.hi_dword.rss);
 
+			if (pkt_flags & PKT_RX_FDIR)
+				pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb);
+
 			mb->ol_flags |= pkt_flags;
 		}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 08/11] net/iavf: support flow mark in AVX path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (6 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 07/11] net/iavf: support flow mark in normal data path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 09/11] net/iavf: support flow mark in SSE path Leyi Rong
                     ` (3 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in AVX path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 72 +++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 04603e7d4..9f467d4fc 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -614,6 +614,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
 	return received;
 }
 
+static inline __m256i
+flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC);
+	__m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask);
+	const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline uint16_t
 _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				      struct rte_mbuf **rx_pkts,
@@ -675,8 +694,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m256i shuf_msk =
 		_mm256_set_epi8
 			(/* first descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -684,8 +703,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/* pkt_type set as unknown */
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
-			 15, 14,
-			 13, 12,	/* octet 12~15, 32 bits rss */
+			 0xFF, 0xFF,
+			 0xFF, 0xFF,    /* rss not supported */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -927,8 +946,51 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 					    rss_vlan_flag_bits);
 
 		/* merge flags */
-		const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
+		__m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
 				rss_vlan_flags);
+
+		if (rxq->fdir_enabled) {
+			const __m256i fdir_id4_7 =
+				_mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5);
+
+			const __m256i fdir_id0_3 =
+				_mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1);
+
+			const __m256i fdir_id0_7 =
+				_mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3);
+
+			const __m256i fdir_flags =
+				flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7);
+
+			/* merge with fdir_flags */
+			mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags);
+
+			/* write to mbuf: have to use scalar store here */
+			rx_pkts[i + 0]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 3);
+
+			rx_pkts[i + 1]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 7);
+
+			rx_pkts[i + 2]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 2);
+
+			rx_pkts[i + 3]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 6);
+
+			rx_pkts[i + 4]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 1);
+
+			rx_pkts[i + 5]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 5);
+
+			rx_pkts[i + 6]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 0);
+
+			rx_pkts[i + 7]->hash.fdir.hi =
+				_mm256_extract_epi32(fdir_id0_7, 4);
+		} /* if() on fdir_enabled */
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 09/11] net/iavf: support flow mark in SSE path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (7 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 08/11] net/iavf: support flow mark in AVX path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
                     ` (2 subsequent siblings)
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support Flow Director mark ID parsing from Flex
Rx descriptor in SSE path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 52 +++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index 9c1f2a445..d7d840853 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	_mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);
 }
 
+static inline __m128i
+flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3)
+{
+#define FDID_MIS_MAGIC 0xFFFFFFFF
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2));
+	RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13));
+	const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR |
+			PKT_RX_FDIR_ID);
+	/* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */
+	const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC);
+	__m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3,
+			fdir_mis_mask);
+	/* this XOR op results to bit-reverse the fdir_mask */
+	fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask);
+	const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit);
+
+	return fdir_flags;
+}
+
 static inline void
 flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 		       struct rte_mbuf **rx_pkts)
@@ -267,6 +286,36 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
 	/* merge the flags */
 	flags = _mm_or_si128(flags, rss_vlan);
 
+	if (rxq->fdir_enabled) {
+		const __m128i fdir_id0_1 =
+			_mm_unpackhi_epi32(descs[0], descs[1]);
+
+		const __m128i fdir_id2_3 =
+			_mm_unpackhi_epi32(descs[2], descs[3]);
+
+		const __m128i fdir_id0_3 =
+			_mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3);
+
+		const __m128i fdir_flags =
+			flex_rxd_to_fdir_flags_vec(fdir_id0_3);
+
+		/* merge with fdir_flags */
+		flags = _mm_or_si128(flags, fdir_flags);
+
+		/* write fdir_id to mbuf */
+		rx_pkts[0]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 0);
+
+		rx_pkts[1]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 1);
+
+		rx_pkts[2]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 2);
+
+		rx_pkts[3]->hash.fdir.hi =
+			_mm_extract_epi32(fdir_id0_3, 3);
+	} /* if() on fdir_enabled */
+
 	/**
 	 * At this point, we have the 4 sets of flags in the low 16-bits
 	 * of each 32-bit value in flags.
@@ -604,7 +653,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	const __m128i zero = _mm_setzero_si128();
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
-			(15, 14, 13, 12,  /* octet 12~15, 32 bits rss */
+			(0xFF, 0xFF,
+			 0xFF, 0xFF,  /* rss not supported */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 10/11] net/iavf: add RSS hash parsing in AVX path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (8 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 09/11] net/iavf: support flow mark in SSE path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
  2020-04-21  3:14   ` [dpdk-dev] [PATCH v6 00/11] framework for advanced iAVF PMD Ye Xiaolong
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in AVX data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 94 ++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index 9f467d4fc..e5e0fd309 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -695,7 +695,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 		_mm256_set_epi8
 			(/* first descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -704,7 +704,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 			 0xFF, 0xFF,	/*pkt_type set as unknown */
 			 /* second descriptor */
 			 0xFF, 0xFF,
-			 0xFF, 0xFF,    /* rss not supported */
+			 0xFF, 0xFF,    /* rss hash parsed separately */
 			 11, 10,	/* octet 10~11, 16 bits vlan_macip */
 			 5, 4,		/* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,	/* skip hi 16 bits pkt_len, zero out */
@@ -991,6 +991,96 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
 				_mm256_extract_epi32(fdir_id0_7, 4);
 		} /* if() on fdir_enabled */
 
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh7 =
+				_mm_load_si128
+					((void *)(&rxdp[7].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh6 =
+				_mm_load_si128
+					((void *)(&rxdp[6].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh5 =
+				_mm_load_si128
+					((void *)(&rxdp[5].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh4 =
+				_mm_load_si128
+					((void *)(&rxdp[4].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			__m256i raw_desc_bh6_7 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh6),
+					raw_desc_bh7, 1);
+			__m256i raw_desc_bh4_5 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh4),
+					raw_desc_bh5, 1);
+			__m256i raw_desc_bh2_3 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh2),
+					raw_desc_bh3, 1);
+			__m256i raw_desc_bh0_1 =
+				_mm256_inserti128_si256
+					(_mm256_castsi128_si256(raw_desc_bh0),
+					raw_desc_bh1, 1);
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m256i rss_hash6_7 =
+				_mm256_slli_epi64(raw_desc_bh6_7, 32);
+			__m256i rss_hash4_5 =
+				_mm256_slli_epi64(raw_desc_bh4_5, 32);
+			__m256i rss_hash2_3 =
+				_mm256_slli_epi64(raw_desc_bh2_3, 32);
+			__m256i rss_hash0_1 =
+				_mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+			__m256i rss_hash_msk =
+				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+						 0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash6_7 = _mm256_and_si256
+					(rss_hash6_7, rss_hash_msk);
+			rss_hash4_5 = _mm256_and_si256
+					(rss_hash4_5, rss_hash_msk);
+			rss_hash2_3 = _mm256_and_si256
+					(rss_hash2_3, rss_hash_msk);
+			rss_hash0_1 = _mm256_and_si256
+					(rss_hash0_1, rss_hash_msk);
+
+			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+		} /* if() on RSS hash parsing */
+#endif
+
 		/**
 		 * At this point, we have the 8 sets of flags in the low 16-bits
 		 * of each 32-bit value in vlan0.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* [dpdk-dev] [PATCH v6 11/11] net/iavf: add RSS hash parsing in SSE path
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (9 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
@ 2020-04-20  6:16   ` Leyi Rong
  2020-04-21  3:14   ` [dpdk-dev] [PATCH v6 00/11] framework for advanced iAVF PMD Ye Xiaolong
  11 siblings, 0 replies; 80+ messages in thread
From: Leyi Rong @ 2020-04-20  6:16 UTC (permalink / raw)
  To: jingjing.wu, qi.z.zhang, beilei.xing, xiaolong.ye; +Cc: dev, Leyi Rong

Support RSS hash parsing from Flex Rx
descriptor in SSE data path.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 drivers/net/iavf/iavf_rxtx_vec_sse.c | 88 +++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 16 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index d7d840853..2f115aa94 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -654,7 +654,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	/* mask to shuffle from desc. to mbuf */
 	const __m128i shuf_msk = _mm_set_epi8
 			(0xFF, 0xFF,
-			 0xFF, 0xFF,  /* rss not supported */
+			 0xFF, 0xFF,  /* rss hash parsed separately */
 			 11, 10,      /* octet 10~11, 16 bits vlan_macip */
 			 5, 4,        /* octet 4~5, 16 bits data_len */
 			 0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
@@ -745,7 +745,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 	     pos += IAVF_VPMD_DESCS_PER_LOOP,
 	     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {
 		__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];
-		__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 		__m128i staterr, sterr_tmp1, sterr_tmp2;
 		/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
 		__m128i mbp1;
@@ -791,8 +791,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		rte_compiler_barrier();
 
 		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
-		pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+		pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+		pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
+		pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+		pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
 
 		/* C.1 4=>2 filter staterr info only */
 		sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
@@ -802,12 +806,68 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
-		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
 
-		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
-		pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
-		pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
+		/**
+		 * needs to load 2nd 16B of each desc for RSS hash parsing,
+		 * will cause performance drop to get into this context.
+		 */
+		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_RSS_HASH) {
+			/* load bottom half of every 32B desc */
+			const __m128i raw_desc_bh3 =
+				_mm_load_si128
+					((void *)(&rxdp[3].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh2 =
+				_mm_load_si128
+					((void *)(&rxdp[2].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh1 =
+				_mm_load_si128
+					((void *)(&rxdp[1].wb.status_error1));
+			rte_compiler_barrier();
+			const __m128i raw_desc_bh0 =
+				_mm_load_si128
+					((void *)(&rxdp[0].wb.status_error1));
+
+			/**
+			 * to shift the 32b RSS hash value to the
+			 * highest 32b of each 128b before mask
+			 */
+			__m128i rss_hash3 =
+				_mm_slli_epi64(raw_desc_bh3, 32);
+			__m128i rss_hash2 =
+				_mm_slli_epi64(raw_desc_bh2, 32);
+			__m128i rss_hash1 =
+				_mm_slli_epi64(raw_desc_bh1, 32);
+			__m128i rss_hash0 =
+				_mm_slli_epi64(raw_desc_bh0, 32);
+
+			__m128i rss_hash_msk =
+				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+			rss_hash3 = _mm_and_si128
+					(rss_hash3, rss_hash_msk);
+			rss_hash2 = _mm_and_si128
+					(rss_hash2, rss_hash_msk);
+			rss_hash1 = _mm_and_si128
+					(rss_hash1, rss_hash_msk);
+			rss_hash0 = _mm_and_si128
+					(rss_hash0, rss_hash_msk);
+
+			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+		} /* if() on RSS hash parsing */
+#endif
 
 		/* C.2 get 4 pkts staterr value  */
 		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
@@ -815,14 +875,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
-			 pkt_mb4);
+			 pkt_mb3);
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
-			 pkt_mb3);
-
-		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
-		pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
-		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+			 pkt_mb2);
 
 		/* C* extract and record EOP bit */
 		if (split_packet) {
@@ -846,9 +902,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
-			 pkt_mb2);
+			 pkt_mb1);
 		_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
-				 pkt_mb1);
+				 pkt_mb0);
 		flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
 		var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 80+ messages in thread

* Re: [dpdk-dev] [PATCH v6 00/11] framework for advanced iAVF PMD
  2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
                     ` (10 preceding siblings ...)
  2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
@ 2020-04-21  3:14   ` Ye Xiaolong
  11 siblings, 0 replies; 80+ messages in thread
From: Ye Xiaolong @ 2020-04-21  3:14 UTC (permalink / raw)
  To: Leyi Rong; +Cc: jingjing.wu, qi.z.zhang, beilei.xing, dev

On 04/20, Leyi Rong wrote:
>This patchset enable framework for advanced iAVF, includes
>flexible descriptor support, FDIR mark id and RSS hash support.
>
>---
>v6:
>- Remove some redundant code which may cause ICC build error.
>
>v5:
>- Set rxdid = 0 in iavf_configure_queues() when it's supported and
>  RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y. So the error can be captured
>  explicitly as kernel PF does not support it yet.
>- Fix some build errors when RTE_LIBRTE_IAVF_16BYTE_RX_DESC is y.
>
>v4:
>- Avoid initial declaration in 'for' loop to backward-compatible
>  with older gcc versions.
>
>v3:
>- Remove patch to query DDP package info as it's specific to DCF.
>- Specify fdir_ref_cnt as per VF value for enable/disable FDIR ID parse.
>- Move fdir_enabled flag to per Queue value for cache benefit when
>  access in vector routines.
>- Store extracted flow_id value to mbuf without judgement to avoid
>  branch speculation fail, which is benefit to performance.
>
>v2:
>- Revert macro RTE_LIBRTE_IAVF_16BYTE_RX_DESC deletion as
>  it's defined in AVF spec.
>- Typo RTE_LIBRTE_ICE_16BYTE_RX_DESC fix in iavf_rxtx.c.
>- Move flex desc definitions into iavf_rxtx.h.
>- Up to date to match with the latest version of virtchnl.h.
>- Extract a new internal func iavf_update_rx_tail to call.
>- Remove
>  iavf_dev_rxq_count_flex_rxd()/iavf_dev_rx_desc_status_flex_rxd(),
>  as the accompanying legacy ones can deal with the flex desc cases.
>- Move rxq->rxdid assignment from iavf_configure_queues()
>  to iavf_dev_rx_queue_setup().
>- Unfold _mm_extract_epi32(fdir_id0_3, i) to fix build error
>  when using GCC compile option -O0.
>
>Leyi Rong (11):
>  net/iavf: flexible Rx descriptor definitions
>  net/iavf: return error if opcode is mismatched
>  net/iavf: flexible Rx descriptor support in normal path
>  net/iavf: flexible Rx descriptor support in AVX path
>  net/iavf: flexible Rx descriptor support in SSE path
>  net/iavf: add flow director enabled switch value
>  net/iavf: support flow mark in normal data path
>  net/iavf: support flow mark in AVX path
>  net/iavf: support flow mark in SSE path
>  net/iavf: add RSS hash parsing in AVX path
>  net/iavf: add RSS hash parsing in SSE path
>
> drivers/net/iavf/iavf.h               |   6 +
> drivers/net/iavf/iavf_ethdev.c        |   8 +
> drivers/net/iavf/iavf_rxtx.c          | 546 ++++++++++++++++++--
> drivers/net/iavf/iavf_rxtx.h          | 250 ++++++++++
> drivers/net/iavf/iavf_rxtx_vec_avx2.c | 691 ++++++++++++++++++++++++++
> drivers/net/iavf/iavf_rxtx_vec_sse.c  | 520 +++++++++++++++++++
> drivers/net/iavf/iavf_vchnl.c         |  59 ++-
> 7 files changed, 2035 insertions(+), 45 deletions(-)
>
>-- 
>2.17.1
>

Applied to dpdk-next-net-intel, Thanks.

^ permalink raw reply	[flat|nested] 80+ messages in thread

end of thread, other threads:[~2020-04-21  3:19 UTC | newest]

Thread overview: 80+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-16  7:45 [dpdk-dev] [PATCH 00/12] framework for advanced iAVF PMD Leyi Rong
2020-03-16  7:45 ` [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option Leyi Rong
2020-03-16  7:45 ` [dpdk-dev] [PATCH 02/12] net/iavf: return error if opcode is mismatched Leyi Rong
2020-03-16  7:45 ` [dpdk-dev] [PATCH 03/12] net/iavf: support to query DDP package info Leyi Rong
2020-03-16  7:45 ` [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
2020-03-26  6:50   ` Wu, Jingjing
2020-03-16  7:45 ` [dpdk-dev] [PATCH 05/12] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
2020-03-16  7:45 ` [dpdk-dev] [PATCH 06/12] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
2020-03-16  7:45 ` [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value Leyi Rong
2020-03-26  6:57   ` Wu, Jingjing
2020-03-16  7:45 ` [dpdk-dev] [PATCH 08/12] net/iavf: support flow mark in normal data path Leyi Rong
2020-03-16  7:46 ` [dpdk-dev] [PATCH 09/12] net/iavf: support flow mark in AVX path Leyi Rong
2020-03-16  7:46 ` [dpdk-dev] [PATCH 10/12] net/iavf: support flow mark in SSE path Leyi Rong
2020-03-16  7:46 ` [dpdk-dev] [PATCH 11/12] net/iavf: add RSS hash parsing in AVX path Leyi Rong
2020-03-16  7:46 ` [dpdk-dev] [PATCH 12/12] net/iavf: add RSS hash parsing in SSE path Leyi Rong
2020-03-31  8:02 ` [dpdk-dev] [PATCH v2 00/12] framework for advanced iAVF PMD Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 01/12] net/iavf: flexible Rx descriptor definitions Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 02/12] net/iavf: return error if opcode is mismatched Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 03/12] net/iavf: support to query DDP package info Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 04/12] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 05/12] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 06/12] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 07/12] net/iavf: add flow director enabled switch value Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 08/12] net/iavf: support flow mark in normal data path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 09/12] net/iavf: support flow mark in AVX path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 10/12] net/iavf: support flow mark in SSE path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 11/12] net/iavf: add RSS hash parsing in AVX path Leyi Rong
2020-03-31  8:02   ` [dpdk-dev] [PATCH v2 12/12] net/iavf: add RSS hash parsing in SSE path Leyi Rong
2020-04-08  6:21 ` [dpdk-dev] [PATCH v3 00/11] framework for advanced iAVF PMD Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 06/11] net/iavf: add flow director enabled switch value Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 07/11] net/iavf: support flow mark in normal data path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 08/11] net/iavf: support flow mark in AVX path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 09/11] net/iavf: support flow mark in SSE path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
2020-04-08  6:22   ` [dpdk-dev] [PATCH v3 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
2020-04-14  6:15 ` [dpdk-dev] [PATCH v4 00/11] framework for advanced iAVF PMD Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 06/11] net/iavf: add flow director enabled switch value Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 07/11] net/iavf: support flow mark in normal data path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 08/11] net/iavf: support flow mark in AVX path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 09/11] net/iavf: support flow mark in SSE path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
2020-04-14  6:15   ` [dpdk-dev] [PATCH v4 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
2020-04-16  8:09 ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
2020-04-17 13:28     ` Ferruh Yigit
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 06/11] net/iavf: add flow director enabled switch value Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 07/11] net/iavf: support flow mark in normal data path Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 08/11] net/iavf: support flow mark in AVX path Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 09/11] net/iavf: support flow mark in SSE path Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
2020-04-16  8:09   ` [dpdk-dev] [PATCH v5 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
2020-04-16 12:21   ` [dpdk-dev] [PATCH v5 00/11] framework for advanced iAVF PMD Zhang, Qi Z
2020-04-17  4:11     ` Ye Xiaolong
2020-04-20  6:16 ` [dpdk-dev] [PATCH v6 " Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 01/11] net/iavf: flexible Rx descriptor definitions Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 02/11] net/iavf: return error if opcode is mismatched Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 03/11] net/iavf: flexible Rx descriptor support in normal path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 04/11] net/iavf: flexible Rx descriptor support in AVX path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 05/11] net/iavf: flexible Rx descriptor support in SSE path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 06/11] net/iavf: add flow director enabled switch value Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 07/11] net/iavf: support flow mark in normal data path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 08/11] net/iavf: support flow mark in AVX path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 09/11] net/iavf: support flow mark in SSE path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 10/11] net/iavf: add RSS hash parsing in AVX path Leyi Rong
2020-04-20  6:16   ` [dpdk-dev] [PATCH v6 11/11] net/iavf: add RSS hash parsing in SSE path Leyi Rong
2020-04-21  3:14   ` [dpdk-dev] [PATCH v6 00/11] framework for advanced iAVF PMD Ye Xiaolong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.