All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matan Azrad <matan@mellanox.com>
To: Ferruh Yigit <ferruh.yigit@intel.com>,
	Shahaf Shuler <shahafs@mellanox.com>,
	Yongseok Koh <yskoh@mellanox.com>,
	Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Cc: dev@dpdk.org, Dekel Peled <dekelp@mellanox.com>
Subject: [dpdk-dev] [PATCH v2 25/28] net/mlx5: handle LRO packets in Rx queue
Date: Mon, 22 Jul 2019 14:52:22 +0000	[thread overview]
Message-ID: <1563807145-16577-26-git-send-email-matan@mellanox.com> (raw)
In-Reply-To: <1563807145-16577-1-git-send-email-matan@mellanox.com>

When LRO offload is configured in Rx queue, the HW may coalesce TCP
packets from same TCP connection into single packet.

In this case the SW should fix the relevant packet headers because the
HW doesn't update them according to the new created packet
characteristics.

Add update header code to the mprq Rx burst function to support LRO
feature.

Signed-off-by: Matan Azrad <matan@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_prm.h  |  15 ++++++
 drivers/net/mlx5/mlx5_rxtx.c | 113 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 3f73a28..32bc7a6 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -155,6 +155,21 @@
 /* Tunnel packet bit in the CQE. */
 #define MLX5_CQE_RX_TUNNEL_PACKET (1u << 0)
 
+/* Mask for LRO push flag in the CQE lro_tcppsh_abort_dupack field. */
+#define MLX5_CQE_LRO_PUSH_MASK 0x40
+
+/* Mask for L4 type in the CQE hdr_type_etc field. */
+#define MLX5_CQE_L4_TYPE_MASK 0x70
+
+/* The bit index of L4 type in CQE hdr_type_etc field. */
+#define MLX5_CQE_L4_TYPE_SHIFT 0x4
+
+/* L4 type to indicate TCP packet without acknowledgment. */
+#define MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK 0x3
+
+/* L4 type to indicate TCP packet with acknowledgment. */
+#define MLX5_L4_HDR_TYPE_TCP_WITH_ACL 0x4
+
 /* Inner L3 checksum offload (Tunneled packets only). */
 #define MLX5_ETH_WQE_L3_INNER_CSUM (1u << 4)
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 241e01b..c7487ac 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1374,6 +1374,101 @@ enum mlx5_txcmp_code {
 	return i;
 }
 
+/**
+ * Update LRO packet TCP header.
+ * The HW LRO feature doesn't update the TCP header after coalescing the
+ * TCP segments but supplies information in CQE to fill it by SW.
+ *
+ * @param tcp
+ *   Pointer to the TCP header.
+ * @param cqe
+ *   Pointer to the completion entry..
+ * @param phcsum
+ *   The L3 pseudo-header checksum.
+ */
+static inline void
+mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp,
+			volatile struct mlx5_cqe *restrict cqe,
+			uint32_t phcsum)
+{
+	uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
+			   MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
+	/*
+	 * The HW calculates only the TCP payload checksum, need to complete
+	 * the TCP header checksum and the L3 pseudo-header checksum.
+	 */
+	uint32_t csum = phcsum + cqe->csum;
+
+	if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK ||
+	    l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) {
+		tcp->tcp_flags |= RTE_TCP_ACK_FLAG;
+		tcp->recv_ack = cqe->lro_ack_seq_num;
+		tcp->rx_win = cqe->lro_tcp_win;
+	}
+	if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK)
+		tcp->tcp_flags |= RTE_TCP_PSH_FLAG;
+	tcp->cksum = 0;
+	csum += rte_raw_cksum(tcp, (tcp->data_off & 0xF) * 4);
+	csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff);
+	csum = (~csum) & 0xffff;
+	if (csum == 0)
+		csum = 0xffff;
+	tcp->cksum = csum;
+}
+
+/**
+ * Update LRO packet headers.
+ * The HW LRO feature doesn't update the L3/TCP headers after coalescing the
+ * TCP segments but supply information in CQE to fill it by SW.
+ *
+ * @param padd
+ *   The packet address.
+ * @param cqe
+ *   Pointer to the completion entry..
+ * @param len
+ *   The packet length.
+ */
+static inline void
+mlx5_lro_update_hdr(uint8_t *restrict padd,
+		    volatile struct mlx5_cqe *restrict cqe,
+		    uint32_t len)
+{
+	union {
+		struct rte_ether_hdr *eth;
+		struct rte_vlan_hdr *vlan;
+		struct rte_ipv4_hdr *ipv4;
+		struct rte_ipv6_hdr *ipv6;
+		struct rte_tcp_hdr *tcp;
+		uint8_t *hdr;
+	} h = {
+			.hdr = padd,
+	};
+	uint16_t proto = h.eth->ether_type;
+	uint32_t phcsum;
+
+	h.eth++;
+	while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) ||
+	       proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) {
+		proto = h.vlan->eth_proto;
+		h.vlan++;
+	}
+	if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) {
+		h.ipv4->time_to_live = cqe->lro_min_ttl;
+		h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd));
+		h.ipv4->hdr_checksum = 0;
+		h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4);
+		phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0);
+		h.ipv4++;
+	} else {
+		h.ipv6->hop_limits = cqe->lro_min_ttl;
+		h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) -
+						       sizeof(*h.ipv6));
+		phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0);
+		h.ipv6++;
+	}
+	mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum);
+}
+
 void
 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque)
 {
@@ -1458,6 +1553,7 @@ enum mlx5_txcmp_code {
 		uint32_t byte_cnt;
 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
 		uint32_t rss_hash_res = 0;
+		uint8_t lro_num_seg;
 
 		if (consumed_strd == strd_n) {
 			/* Replace WQE only if the buffer is still in use. */
@@ -1503,6 +1599,7 @@ enum mlx5_txcmp_code {
 		}
 		assert(strd_idx < strd_n);
 		assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask));
+		lro_num_seg = cqe->lro_num_seg;
 		/*
 		 * Currently configured to receive a packet per a stride. But if
 		 * MTU is adjusted through kernel interface, device could
@@ -1510,7 +1607,7 @@ enum mlx5_txcmp_code {
 		 * case, the packet should be dropped because it is bigger than
 		 * the max_rx_pkt_len.
 		 */
-		if (unlikely(strd_cnt > 1)) {
+		if (unlikely(!lro_num_seg && strd_cnt > 1)) {
 			++rxq->stats.idropped;
 			continue;
 		}
@@ -1547,19 +1644,20 @@ enum mlx5_txcmp_code {
 			rte_iova_t buf_iova;
 			struct rte_mbuf_ext_shared_info *shinfo;
 			uint16_t buf_len = strd_cnt * strd_sz;
+			void *buf_addr;
 
 			/* Increment the refcnt of the whole chunk. */
 			rte_atomic16_add_return(&buf->refcnt, 1);
 			assert((uint16_t)rte_atomic16_read(&buf->refcnt) <=
 			       strd_n + 1);
-			addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
+			buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
 			/*
 			 * MLX5 device doesn't use iova but it is necessary in a
 			 * case where the Rx packet is transmitted via a
 			 * different PMD.
 			 */
 			buf_iova = rte_mempool_virt2iova(buf) +
-				   RTE_PTR_DIFF(addr, buf);
+				   RTE_PTR_DIFF(buf_addr, buf);
 			shinfo = &buf->shinfos[strd_idx];
 			rte_mbuf_ext_refcnt_set(shinfo, 1);
 			/*
@@ -1568,8 +1666,8 @@ enum mlx5_txcmp_code {
 			 * will be added below by calling rxq_cq_to_mbuf().
 			 * Other fields will be overwritten.
 			 */
-			rte_pktmbuf_attach_extbuf(pkt, addr, buf_iova, buf_len,
-						  shinfo);
+			rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
+						  buf_len, shinfo);
 			rte_pktmbuf_reset_headroom(pkt);
 			assert(pkt->ol_flags == EXT_ATTACHED_MBUF);
 			/*
@@ -1583,6 +1681,11 @@ enum mlx5_txcmp_code {
 			}
 		}
 		rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
+		if (lro_num_seg > 1) {
+			mlx5_lro_update_hdr(addr, cqe, len);
+			pkt->ol_flags |= PKT_RX_LRO;
+			pkt->tso_segsz = strd_sz;
+		}
 		PKT_LEN(pkt) = len;
 		DATA_LEN(pkt) = len;
 		PORT(pkt) = rxq->port_id;
-- 
1.8.3.1


  parent reply	other threads:[~2019-07-22 14:57 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-22  9:12 [dpdk-dev] [PATCH 00/28] net/mlx5: support LRO Matan Azrad
2019-07-22  9:12 ` [dpdk-dev] [PATCH 01/28] net/mlx5: remove redundant item from union Matan Azrad
2019-07-22  9:17   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 02/28] net/mlx5: add LRO APIs and initial settings Matan Azrad
2019-07-22  9:25   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 03/28] net/mlx5: support LRO caps query using devx API Matan Azrad
2019-07-22  9:17   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 04/28] net/mlx5: glue func for queue query using new API Matan Azrad
2019-07-22  9:18   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 05/28] net/mlx5: glue function for action " Matan Azrad
2019-07-22  9:18   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 06/28] net/mlx5: check conditions to enable LRO Matan Azrad
2019-07-22  9:18   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 07/28] net/mlx5: support Tx interface query using new API Matan Azrad
2019-07-22  9:19   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 08/28] net/mlx5: update Tx queue create for LRO Matan Azrad
2019-07-22  9:18   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 09/28] net/mlx5: create advanced RxQ object using new API Matan Azrad
2019-07-22  9:17   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 10/28] net/mlx5: modify " Matan Azrad
2019-07-22  9:20   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 11/28] net/mlx5: create advanced Rx " Matan Azrad
2019-07-22  9:20   ` Slava Ovsiienko
2019-07-22  9:12 ` [dpdk-dev] [PATCH 12/28] net/mlx5: create advanced RxQ table " Matan Azrad
2019-07-22  9:21   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 13/28] net/mlx5: allocate door-bells " Matan Azrad
2019-07-22  9:20   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 14/28] net/mlx5: rename RxQ verbs to general RxQ object Matan Azrad
2019-07-22  9:22   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 15/28] net/mlx5: rename verbs indirection table to obj Matan Azrad
2019-07-22  9:22   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 16/28] net/mlx5: rename hash RxQ verbs to general Matan Azrad
2019-07-22  9:22   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 17/28] net/mlx5: update queue state modify function Matan Azrad
2019-07-22  9:22   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 18/28] net/mlx5: store protection domain number on create Matan Azrad
2019-07-22  9:21   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 19/28] net/mlx5: func to create Rx verbs completion queue Matan Azrad
2019-07-22  9:23   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 20/28] net/mlx5: function to create Rx verbs work queue Matan Azrad
2019-07-22  9:21   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 21/28] net/mlx5: create advanced RxQ using new API Matan Azrad
2019-07-22  9:21   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 22/28] net/mlx5: support LRO with single RxQ object Matan Azrad
2019-07-22  9:22   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 23/28] net/mlx5: replace the external mbuf shared memory Matan Azrad
2019-07-22  9:21   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 24/28] net/mlx5: update LRO fields in completion entry Matan Azrad
2019-07-22  9:23   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 25/28] net/mlx5: handle LRO packets in Rx queue Matan Azrad
2019-07-22  9:26   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 26/28] net/mlx5: zero the LRO mbuf headroom Matan Azrad
2019-07-22  9:23   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 27/28] net/mlx5: adjust the maximum LRO message size Matan Azrad
2019-07-22  9:23   ` Slava Ovsiienko
2019-07-22  9:13 ` [dpdk-dev] [PATCH 28/28] doc: update MLX5 doc and release notes with LRO Matan Azrad
2019-07-22  9:23   ` Slava Ovsiienko
2019-07-22 10:42 ` [dpdk-dev] [PATCH 00/28] net/mlx5: support LRO Raslan Darawsheh
2019-07-22 12:48 ` Ferruh Yigit
2019-07-22 13:32   ` Matan Azrad
2019-07-22 14:51 ` [dpdk-dev] [PATCH v2 " Matan Azrad
2019-07-22 14:51   ` [dpdk-dev] [PATCH v2 01/28] net/mlx5: remove redundant item from union Matan Azrad
2019-07-23 10:53     ` Ferruh Yigit
2019-07-23 12:10       ` Matan Azrad
2019-07-22 14:51   ` [dpdk-dev] [PATCH v2 02/28] net/mlx5: add LRO APIs and initial settings Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 03/28] net/mlx5: support LRO caps query using devx API Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 04/28] net/mlx5: glue func for queue query using new API Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 05/28] net/mlx5: glue function for action " Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 06/28] net/mlx5: check conditions to enable LRO Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 07/28] net/mlx5: support Tx interface query using new API Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 08/28] net/mlx5: update Tx queue create for LRO Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 09/28] net/mlx5: create advanced RxQ object using new API Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 10/28] net/mlx5: modify " Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 11/28] net/mlx5: create advanced Rx " Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 12/28] net/mlx5: create advanced RxQ table " Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 13/28] net/mlx5: allocate door-bells " Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 14/28] net/mlx5: rename RxQ verbs to general RxQ object Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 15/28] net/mlx5: rename verbs indirection table to obj Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 16/28] net/mlx5: rename hash RxQ verbs to general Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 17/28] net/mlx5: update queue state modify function Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 18/28] net/mlx5: store protection domain number on create Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 19/28] net/mlx5: func to create Rx verbs completion queue Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 20/28] net/mlx5: function to create Rx verbs work queue Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 21/28] net/mlx5: create advanced RxQ using new API Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 22/28] net/mlx5: support LRO with single RxQ object Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 23/28] net/mlx5: replace the external mbuf shared memory Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 24/28] net/mlx5: update LRO fields in completion entry Matan Azrad
2019-07-22 14:52   ` Matan Azrad [this message]
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 26/28] net/mlx5: zero the LRO mbuf headroom Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 27/28] net/mlx5: adjust the maximum LRO message size Matan Azrad
2019-07-22 14:52   ` [dpdk-dev] [PATCH v2 28/28] doc: update MLX5 doc and release notes with LRO Matan Azrad
2019-07-23  6:48   ` [dpdk-dev] [PATCH v2 00/28] net/mlx5: support LRO Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1563807145-16577-26-git-send-email-matan@mellanox.com \
    --to=matan@mellanox.com \
    --cc=dekelp@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=shahafs@mellanox.com \
    --cc=viacheslavo@mellanox.com \
    --cc=yskoh@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.