All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ixgbe: support checksum flags in sse vector Rx function
@ 2016-07-07 12:19 Olivier Matz
  2016-07-08 20:42 ` Bruce Richardson
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Olivier Matz @ 2016-07-07 12:19 UTC (permalink / raw)
  To: dev, konstantin.ananyev, helin.zhang; +Cc: bruce.richardson, Maxime Leroy

Update desc_to_olflags_v() to set PKT_RX_IP_CKSUM_BAD and
PKT_RX_L4_CKSUM_BAD in the ol_fags of the mbuf.

The Rx vector function can now be used with hw_ip_checksum
enabled.

Tested with:

  cd dpdk.org/
  make config T=x86_64-native-linuxapp-gcc
  make -j32
  mkdir -p /mnt/huge
  mount -t hugetlbfs nodev /mnt/huge
  echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
  modprobe uio_pci_generic
  python tools/dpdk_nic_bind.py -b uio_pci_generic 0000:04:00.0
  ./build/app/testpmd -l 2,4 -- --total-num-mbufs=65536 -i --port-topology=chained --enable-rx-cksum --disable-hw-vlan-filter --disable-hw-vlan-strip
    set fwd rxonly
    set verbose 1
    start

  # send packets to testpmd using scapy
  eh = Ether(src="00:01:02:03:04:05", dst="00:1B:21:AB:8F:10")
  p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP()/Raw("x"*50)
  sendp(p, iface="ixgbe2")
  p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=42)/UDP()/Raw("x"*50)
  sendp(p, iface="ixgbe2")
  p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP(chksum=42)/Raw("x"*50)
  sendp(p, iface="ixgbe2")
  p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=45)/UDP(chksum=42)/Raw("x"*50)
  sendp(p, iface="ixgbe2")

  # result
  port 0/queue 0: received 1 packets
    src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
   - Receive queue=0x0
  port 0/queue 0: received 1 packets
    src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
   - Receive queue=0x0
    PKT_RX_IP_CKSUM_BAD
  port 0/queue 0: received 1 packets
    src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
   - Receive queue=0x0
    PKT_RX_L4_CKSUM_BAD
  port 0/queue 0: received 1 packets
    src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
   - Receive queue=0x0
    PKT_RX_L4_CKSUM_BAD
    PKT_RX_IP_CKSUM_BAD

Signed-off-by: Maxime Leroy <maxime.leroy@6wind.com>
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  8 ++---
 drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  6 ++++
 drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c    | 50 +++++++++++++++++++++----------
 3 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
index 62b8201..05f8185 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
@@ -309,12 +309,8 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
 	if (fconf->mode != RTE_FDIR_MODE_NONE)
 		return -1;
 
-	/*
-	 * - no csum error report support
-	 * - no header split support
-	 */
-	if (rxmode->hw_ip_checksum == 1 ||
-	    rxmode->header_split == 1)
+	/* no header split support */
+	if (rxmode->header_split == 1)
 		return -1;
 
 	return 0;
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index 64a329e..f96cc85 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -556,5 +556,11 @@ ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
 int __attribute__((cold))
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
 {
+	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+
+	/* no csum error report support */
+	if (rxmode->hw_ip_checksum == 1)
+		return -1;
+
 	return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
 }
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index 1c4fd7c..dc5657e 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -145,7 +145,7 @@ static inline void
 desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
 	struct rte_mbuf **rx_pkts)
 {
-	__m128i ptype0, ptype1, vtag0, vtag1;
+	__m128i ptype0, ptype1, vtag0, vtag1, csum;
 	union {
 		uint16_t e[4];
 		uint64_t dword;
@@ -162,18 +162,26 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
 			PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
 			PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
 
-	/* mask everything except vlan present bit */
-	const __m128i vlan_msk = _mm_set_epi16(
-			0x0000, 0x0000,
-			0x0000, 0x0000,
-			IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
-			IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
-	/* map vlan present (0x8) to ol_flags */
-	const __m128i vlan_map = _mm_set_epi8(
+	/* mask everything except vlan present and l4/ip csum error */
+	const __m128i vlan_csum_msk = _mm_set_epi16(
+		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16,
+		IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
+		IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
+	/* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */
+	const __m128i vlan_csum_map = _mm_set_epi8(
 		0, 0, 0, 0,
-		0, 0, 0, vlan_flags,
+		vlan_flags | PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+		vlan_flags | PKT_RX_IP_CKSUM_BAD,
+		vlan_flags | PKT_RX_L4_CKSUM_BAD,
+		vlan_flags,
 		0, 0, 0, 0,
-		0, 0, 0, 0);
+		PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
+		PKT_RX_IP_CKSUM_BAD,
+		PKT_RX_L4_CKSUM_BAD,
+		0);
 
 	ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
 	ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
@@ -185,8 +193,21 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
 	ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
 
 	vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
-	vtag1 = _mm_and_si128(vtag1, vlan_msk);
-	vtag1 = _mm_shuffle_epi8(vlan_map, vtag1);
+	vtag1 = _mm_and_si128(vtag1, vlan_csum_msk);
+
+	/* csum bits are in the most significant, to use shuffle we need to
+	 * shift them. Change mask to 0xc000 to 0x0003.
+	 */
+	csum = _mm_srli_epi16(vtag1, 14);
+
+	/* now or the most significant 64 bits containing the checksum
+	 * flags with the vlan present flags.
+	 */
+	csum = _mm_srli_si128(csum, 8);
+	vtag1 = _mm_or_si128(csum, vtag1);
+
+	/* convert VP, IPE, L4E to ol_flags */
+	vtag1 = _mm_shuffle_epi8(vlan_csum_map, vtag1);
 
 	vtag1 = _mm_or_si128(ptype0, vtag1);
 	vol.dword = _mm_cvtsi128_si64(vtag1);
@@ -210,7 +231,6 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
  * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
- * - don't support ol_flags for rss and csum err
  */
 static inline uint16_t
 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -425,7 +445,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
  * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
  * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
- * - don't support ol_flags for rss and csum err
  */
 uint16_t
 ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -438,7 +457,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
  * vPMD receive routine that reassembles scattered packets
  *
  * Notice:
- * - don't support ol_flags for rss and csum err
  * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
  * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
  *   numbers of DD bit
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] ixgbe: support checksum flags in sse vector Rx function
  2016-07-07 12:19 [PATCH] ixgbe: support checksum flags in sse vector Rx function Olivier Matz
@ 2016-07-08 20:42 ` Bruce Richardson
  2016-07-10 17:41   ` Olivier Matz
  2016-07-14  9:24 ` Chandran, Sugesh
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Bruce Richardson @ 2016-07-08 20:42 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev, konstantin.ananyev, helin.zhang, Maxime Leroy

On Thu, Jul 07, 2016 at 02:19:02PM +0200, Olivier Matz wrote:
> Update desc_to_olflags_v() to set PKT_RX_IP_CKSUM_BAD and
> PKT_RX_L4_CKSUM_BAD in the ol_fags of the mbuf.
> 
> The Rx vector function can now be used with hw_ip_checksum
> enabled.
> 
> Tested with:
> 
>   cd dpdk.org/
>   make config T=x86_64-native-linuxapp-gcc
>   make -j32
>   mkdir -p /mnt/huge
>   mount -t hugetlbfs nodev /mnt/huge
>   echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
>   modprobe uio_pci_generic
>   python tools/dpdk_nic_bind.py -b uio_pci_generic 0000:04:00.0
>   ./build/app/testpmd -l 2,4 -- --total-num-mbufs=65536 -i --port-topology=chained --enable-rx-cksum --disable-hw-vlan-filter --disable-hw-vlan-strip
>     set fwd rxonly
>     set verbose 1
>     start
> 
>   # send packets to testpmd using scapy
>   eh = Ether(src="00:01:02:03:04:05", dst="00:1B:21:AB:8F:10")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP()/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=42)/UDP()/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP(chksum=42)/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=45)/UDP(chksum=42)/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
> 
>   # result
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_IP_CKSUM_BAD
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_L4_CKSUM_BAD
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_L4_CKSUM_BAD
>     PKT_RX_IP_CKSUM_BAD
> 
> Signed-off-by: Maxime Leroy <maxime.leroy@6wind.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---

Given this looks a significant change to a complicated piece of code, which is
also very performance sensitive, I believe this change needs to be deferred to
16.11 release, as there is not enough time to properly review and test it for
16.07.

/Bruce

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ixgbe: support checksum flags in sse vector Rx function
  2016-07-08 20:42 ` Bruce Richardson
@ 2016-07-10 17:41   ` Olivier Matz
  0 siblings, 0 replies; 8+ messages in thread
From: Olivier Matz @ 2016-07-10 17:41 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, konstantin.ananyev, helin.zhang, Maxime Leroy

Hi Bruce,

On 07/08/2016 10:42 PM, Bruce Richardson wrote:
> On Thu, Jul 07, 2016 at 02:19:02PM +0200, Olivier Matz wrote:
>> Update desc_to_olflags_v() to set PKT_RX_IP_CKSUM_BAD and
>> PKT_RX_L4_CKSUM_BAD in the ol_fags of the mbuf.
>>
{...}
> 
> Given this looks a significant change to a complicated piece of code, which is
> also very performance sensitive, I believe this change needs to be deferred to
> 16.11 release, as there is not enough time to properly review and test it for
> 16.07.


Sure, that was the plan, I forgot to mention it :)

Thanks,
Olivier

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ixgbe: support checksum flags in sse vector Rx function
  2016-07-07 12:19 [PATCH] ixgbe: support checksum flags in sse vector Rx function Olivier Matz
  2016-07-08 20:42 ` Bruce Richardson
@ 2016-07-14  9:24 ` Chandran, Sugesh
  2016-07-19 15:51   ` Olivier Matz
  2016-09-14 12:39 ` Ferruh Yigit
  2016-10-06 14:00 ` Remy Horton
  3 siblings, 1 reply; 8+ messages in thread
From: Chandran, Sugesh @ 2016-07-14  9:24 UTC (permalink / raw)
  To: 'Olivier Matz', dev, Ananyev, Konstantin, Zhang, Helin
  Cc: Richardson, Bruce, Maxime Leroy

Hi Olivier,
Thank you for working on this.
We tried to enable checksum offload in OVS-DPDK and couldn't proceed due to the performance impact.
I assume this patch will fix that issue by enabling checksum offloading with vectorization ON at Rx side.

Few questions,
1) Is there any plan to extend this to other NIC drivers, other than ixgbe? What are the implications of it?
2) Is it possible to enable it on the Tx side as well?

I haven't looked into patch very detail and very little context on it. So please forgive me if any of these queries make no sense.

Regards
_Sugesh


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> Sent: Thursday, July 7, 2016 1:19 PM
> To: dev@dpdk.org; Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> Zhang, Helin <helin.zhang@intel.com>
> Cc: Richardson, Bruce <bruce.richardson@intel.com>; Maxime Leroy
> <maxime.leroy@6wind.com>
> Subject: [dpdk-dev] [PATCH] ixgbe: support checksum flags in sse vector Rx
> function
> 
> Update desc_to_olflags_v() to set PKT_RX_IP_CKSUM_BAD and
> PKT_RX_L4_CKSUM_BAD in the ol_fags of the mbuf.
> 
> The Rx vector function can now be used with hw_ip_checksum enabled.
> 
> Tested with:
> 
>   cd dpdk.org/
>   make config T=x86_64-native-linuxapp-gcc
>   make -j32
>   mkdir -p /mnt/huge
>   mount -t hugetlbfs nodev /mnt/huge
>   echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-
> 2048kB/nr_hugepages
>   modprobe uio_pci_generic
>   python tools/dpdk_nic_bind.py -b uio_pci_generic 0000:04:00.0
>   ./build/app/testpmd -l 2,4 -- --total-num-mbufs=65536 -i --port-
> topology=chained --enable-rx-cksum --disable-hw-vlan-filter --disable-hw-
> vlan-strip
>     set fwd rxonly
>     set verbose 1
>     start
> 
>   # send packets to testpmd using scapy
>   eh = Ether(src="00:01:02:03:04:05", dst="00:1B:21:AB:8F:10")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP()/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=42)/UDP()/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP(chksum=42)/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2",
> chksum=45)/UDP(chksum=42)/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
> 
>   # result
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 -
> nb_segs=1Unknown packet type
>    - Receive queue=0x0
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 -
> nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_IP_CKSUM_BAD
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 -
> nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_L4_CKSUM_BAD
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 -
> nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_L4_CKSUM_BAD
>     PKT_RX_IP_CKSUM_BAD
> 
> Signed-off-by: Maxime Leroy <maxime.leroy@6wind.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  8 ++---
>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  6 ++++
>  drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c    | 50 +++++++++++++++++++++--
> --------
>  3 files changed, 42 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
> b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
> index 62b8201..05f8185 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h
> @@ -309,12 +309,8 @@
> ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
>  	if (fconf->mode != RTE_FDIR_MODE_NONE)
>  		return -1;
> 
> -	/*
> -	 * - no csum error report support
> -	 * - no header split support
> -	 */
> -	if (rxmode->hw_ip_checksum == 1 ||
> -	    rxmode->header_split == 1)
> +	/* no header split support */
> +	if (rxmode->header_split == 1)
>  		return -1;
> 
>  	return 0;
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> index 64a329e..f96cc85 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
> @@ -556,5 +556,11 @@ ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
> int __attribute__((cold))  ixgbe_rx_vec_dev_conf_condition_check(struct
> rte_eth_dev *dev)  {
> +	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
> +
> +	/* no csum error report support */
> +	if (rxmode->hw_ip_checksum == 1)
> +		return -1;
> +
>  	return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
>  }
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
> b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
> index 1c4fd7c..dc5657e 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
> @@ -145,7 +145,7 @@ static inline void
>  desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
>  	struct rte_mbuf **rx_pkts)
>  {
> -	__m128i ptype0, ptype1, vtag0, vtag1;
> +	__m128i ptype0, ptype1, vtag0, vtag1, csum;
>  	union {
>  		uint16_t e[4];
>  		uint64_t dword;
> @@ -162,18 +162,26 @@ desc_to_olflags_v(__m128i descs[4], uint8_t
> vlan_flags,
>  			PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
>  			PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
> PKT_RX_RSS_HASH, 0);
> 
> -	/* mask everything except vlan present bit */
> -	const __m128i vlan_msk = _mm_set_epi16(
> -			0x0000, 0x0000,
> -			0x0000, 0x0000,
> -			IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
> -			IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
> -	/* map vlan present (0x8) to ol_flags */
> -	const __m128i vlan_map = _mm_set_epi8(
> +	/* mask everything except vlan present and l4/ip csum error */
> +	const __m128i vlan_csum_msk = _mm_set_epi16(
> +		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >>
> 16,
> +		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >>
> 16,
> +		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >>
> 16,
> +		(IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >>
> 16,
> +		IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
> +		IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP);
> +	/* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */
> +	const __m128i vlan_csum_map = _mm_set_epi8(
>  		0, 0, 0, 0,
> -		0, 0, 0, vlan_flags,
> +		vlan_flags | PKT_RX_IP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD,
> +		vlan_flags | PKT_RX_IP_CKSUM_BAD,
> +		vlan_flags | PKT_RX_L4_CKSUM_BAD,
> +		vlan_flags,
>  		0, 0, 0, 0,
> -		0, 0, 0, 0);
> +		PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
> +		PKT_RX_IP_CKSUM_BAD,
> +		PKT_RX_L4_CKSUM_BAD,
> +		0);
> 
>  	ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
>  	ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); @@ -185,8
> +193,21 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags,
>  	ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
> 
>  	vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
> -	vtag1 = _mm_and_si128(vtag1, vlan_msk);
> -	vtag1 = _mm_shuffle_epi8(vlan_map, vtag1);
> +	vtag1 = _mm_and_si128(vtag1, vlan_csum_msk);
> +
> +	/* csum bits are in the most significant, to use shuffle we need to
> +	 * shift them. Change mask to 0xc000 to 0x0003.
> +	 */
> +	csum = _mm_srli_epi16(vtag1, 14);
> +
> +	/* now or the most significant 64 bits containing the checksum
> +	 * flags with the vlan present flags.
> +	 */
> +	csum = _mm_srli_si128(csum, 8);
> +	vtag1 = _mm_or_si128(csum, vtag1);
> +
> +	/* convert VP, IPE, L4E to ol_flags */
> +	vtag1 = _mm_shuffle_epi8(vlan_csum_map, vtag1);
> 
>  	vtag1 = _mm_or_si128(ptype0, vtag1);
>  	vol.dword = _mm_cvtsi128_si64(vtag1);
> @@ -210,7 +231,6 @@ desc_to_olflags_v(__m128i descs[4], uint8_t
> vlan_flags,
>   * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
> RTE_IXGBE_MAX_RX_BURST
>   *   numbers of DD bit
>   * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> - * - don't support ol_flags for rss and csum err
>   */
>  static inline uint16_t
>  _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf
> **rx_pkts, @@ -425,7 +445,6 @@ _recv_raw_pkts_vec(struct
> ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>   * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
> RTE_IXGBE_MAX_RX_BURST
>   *   numbers of DD bit
>   * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
> - * - don't support ol_flags for rss and csum err
>   */
>  uint16_t
>  ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -
> 438,7 +457,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf
> **rx_pkts,
>   * vPMD receive routine that reassembles scattered packets
>   *
>   * Notice:
> - * - don't support ol_flags for rss and csum err
>   * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
>   * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan
> RTE_IXGBE_MAX_RX_BURST
>   *   numbers of DD bit
> --
> 2.8.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ixgbe: support checksum flags in sse vector Rx function
  2016-07-14  9:24 ` Chandran, Sugesh
@ 2016-07-19 15:51   ` Olivier Matz
  0 siblings, 0 replies; 8+ messages in thread
From: Olivier Matz @ 2016-07-19 15:51 UTC (permalink / raw)
  To: Chandran, Sugesh, dev, Ananyev, Konstantin, Zhang, Helin
  Cc: Richardson, Bruce, Maxime Leroy

Hi Sugesh,

On 07/14/2016 11:24 AM, Chandran, Sugesh wrote:
> Hi Olivier,
> Thank you for working on this.
> We tried to enable checksum offload in OVS-DPDK and couldn't proceed due to the performance impact.
> I assume this patch will fix that issue by enabling checksum offloading with vectorization ON at Rx side.
> 
> Few questions,
> 1) Is there any plan to extend this to other NIC drivers, other than ixgbe? What are the implications of it?

On my side no plan for other drivers, but this is more a question for
pmd maintainers.


> 2) Is it possible to enable it on the Tx side as well?

Yes, vector tx is enabled or not depending on the feature you request at
init (offload, multisegments, ...). See ixgbe_set_tx_function() for
details. My patch does not change this behavior.

> 
> I haven't looked into patch very detail and very little context on it. So please forgive me if any of these queries make no sense.
> 
> Regards
> _Sugesh

Regards,
Olivier

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ixgbe: support checksum flags in sse vector Rx function
  2016-07-07 12:19 [PATCH] ixgbe: support checksum flags in sse vector Rx function Olivier Matz
  2016-07-08 20:42 ` Bruce Richardson
  2016-07-14  9:24 ` Chandran, Sugesh
@ 2016-09-14 12:39 ` Ferruh Yigit
  2016-10-06 14:00 ` Remy Horton
  3 siblings, 0 replies; 8+ messages in thread
From: Ferruh Yigit @ 2016-09-14 12:39 UTC (permalink / raw)
  To: Olivier Matz, dev, Ananyev, Konstantin, Zhang, Helin
  Cc: Richardson, Bruce, Maxime Leroy

On 7/7/2016 1:19 PM, Olivier Matz wrote:
> Update desc_to_olflags_v() to set PKT_RX_IP_CKSUM_BAD and
> PKT_RX_L4_CKSUM_BAD in the ol_fags of the mbuf.
> 
> The Rx vector function can now be used with hw_ip_checksum
> enabled.
> 
> Tested with:
> 
>   cd dpdk.org/
>   make config T=x86_64-native-linuxapp-gcc
>   make -j32
>   mkdir -p /mnt/huge
>   mount -t hugetlbfs nodev /mnt/huge
>   echo 256 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
>   modprobe uio_pci_generic
>   python tools/dpdk_nic_bind.py -b uio_pci_generic 0000:04:00.0
>   ./build/app/testpmd -l 2,4 -- --total-num-mbufs=65536 -i --port-topology=chained --enable-rx-cksum --disable-hw-vlan-filter --disable-hw-vlan-strip
>     set fwd rxonly
>     set verbose 1
>     start
> 
>   # send packets to testpmd using scapy
>   eh = Ether(src="00:01:02:03:04:05", dst="00:1B:21:AB:8F:10")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP()/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=42)/UDP()/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2")/UDP(chksum=42)/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
>   p = Ether()/IP(src="1.1.1.1", dst="1.1.1.2", chksum=45)/UDP(chksum=42)/Raw("x"*50)
>   sendp(p, iface="ixgbe2")
> 
>   # result
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_IP_CKSUM_BAD
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_L4_CKSUM_BAD
>   port 0/queue 0: received 1 packets
>     src=00:01:02:03:04:05 - dst=00:1B:21:AB:8F:10 - type=0x0800 - length=92 - nb_segs=1Unknown packet type
>    - Receive queue=0x0
>     PKT_RX_L4_CKSUM_BAD
>     PKT_RX_IP_CKSUM_BAD
> 
> Signed-off-by: Maxime Leroy <maxime.leroy@6wind.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---

ixgbe maintainers, can you please review the patch?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: ixgbe: support checksum flags in sse vector Rx function
  2016-07-07 12:19 [PATCH] ixgbe: support checksum flags in sse vector Rx function Olivier Matz
                   ` (2 preceding siblings ...)
  2016-09-14 12:39 ` Ferruh Yigit
@ 2016-10-06 14:00 ` Remy Horton
  2016-10-13 23:27   ` Thomas Monjalon
  3 siblings, 1 reply; 8+ messages in thread
From: Remy Horton @ 2016-10-06 14:00 UTC (permalink / raw)
  To: Olivier Matz, dev, konstantin.ananyev, helin.zhang
  Cc: bruce.richardson, Maxime Leroy


On 07/07/2016 13:19, Olivier Matz wrote:
[..]
> Signed-off-by: Maxime Leroy <maxime.leroy@6wind.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  8 ++---
>  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  6 ++++
>  drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c    | 50 +++++++++++++++++++++----------
>  3 files changed, 42 insertions(+), 22 deletions(-)

Acked-by: Remy Horton <remy.horton@intel.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: ixgbe: support checksum flags in sse vector Rx function
  2016-10-06 14:00 ` Remy Horton
@ 2016-10-13 23:27   ` Thomas Monjalon
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Monjalon @ 2016-10-13 23:27 UTC (permalink / raw)
  To: Olivier Matz
  Cc: dev, Remy Horton, konstantin.ananyev, helin.zhang,
	bruce.richardson, Maxime Leroy

2016-10-06 15:00, Remy Horton:
> On 07/07/2016 13:19, Olivier Matz wrote:
> [..]
> > Signed-off-by: Maxime Leroy <maxime.leroy@6wind.com>
> > Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > ---
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_common.h |  8 ++---
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c   |  6 ++++
> >  drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c    | 50 +++++++++++++++++++++----------
> >  3 files changed, 42 insertions(+), 22 deletions(-)
> 
> Acked-by: Remy Horton <remy.horton@intel.com>

Applied directly in mainline on Ferruh's advice

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-10-13 23:27 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-07 12:19 [PATCH] ixgbe: support checksum flags in sse vector Rx function Olivier Matz
2016-07-08 20:42 ` Bruce Richardson
2016-07-10 17:41   ` Olivier Matz
2016-07-14  9:24 ` Chandran, Sugesh
2016-07-19 15:51   ` Olivier Matz
2016-09-14 12:39 ` Ferruh Yigit
2016-10-06 14:00 ` Remy Horton
2016-10-13 23:27   ` Thomas Monjalon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.