All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols
@ 2013-03-18 16:51 Dmitry Kravkov
  2013-03-18 16:51 ` [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic Dmitry Kravkov
  2013-03-18 17:11 ` [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols David Miller
  0 siblings, 2 replies; 19+ messages in thread
From: Dmitry Kravkov @ 2013-03-18 16:51 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein

The patch utilizes FW offload capabilities for
encapsulation protocols.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |   29 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |  196 ++++++++++++++++++++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |    7 +
 3 files changed, 204 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 9e8d195..a4729c7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -612,9 +612,10 @@ struct bnx2x_fastpath {
  * START_BD		- describes packed
  * START_BD(splitted)	- includes unpaged data segment for GSO
  * PARSING_BD		- for TSO and CSUM data
+ * PARSING_BD2		- for encapsulation data
  * Frag BDs		- decribes pages for frags
  */
-#define BDS_PER_TX_PKT		3
+#define BDS_PER_TX_PKT		4
 #define MAX_BDS_PER_TX_PKT	(MAX_SKB_FRAGS + BDS_PER_TX_PKT)
 /* max BDs per tx packet including next pages */
 #define MAX_DESC_PER_TX_PKT	(MAX_BDS_PER_TX_PKT + \
@@ -731,16 +732,22 @@ struct bnx2x_fastpath {
 
 #define pbd_tcp_flags(tcp_hdr)	(ntohl(tcp_flag_word(tcp_hdr))>>16 & 0xff)
 
-#define XMIT_PLAIN			0
-#define XMIT_CSUM_V4			0x1
-#define XMIT_CSUM_V6			0x2
-#define XMIT_CSUM_TCP			0x4
-#define XMIT_GSO_V4			0x8
-#define XMIT_GSO_V6			0x10
-
-#define XMIT_CSUM			(XMIT_CSUM_V4 | XMIT_CSUM_V6)
-#define XMIT_GSO			(XMIT_GSO_V4 | XMIT_GSO_V6)
-
+#define XMIT_PLAIN		0
+#define XMIT_CSUM_V4		(1 << 0)
+#define XMIT_CSUM_V6		(1 << 1)
+#define XMIT_CSUM_TCP		(1 << 2)
+#define XMIT_GSO_V4		(1 << 3)
+#define XMIT_GSO_V6		(1 << 4)
+#define XMIT_CSUM_ENC_V4	(1 << 5)
+#define XMIT_CSUM_ENC_V6	(1 << 6)
+#define XMIT_GSO_ENC_V4		(1 << 7)
+#define XMIT_GSO_ENC_V6		(1 << 8)
+
+#define XMIT_CSUM_ENC		(XMIT_CSUM_ENC_V4 | XMIT_CSUM_ENC_V6)
+#define XMIT_GSO_ENC		(XMIT_GSO_ENC_V4 | XMIT_GSO_ENC_V6)
+
+#define XMIT_CSUM		(XMIT_CSUM_V4 | XMIT_CSUM_V6 | XMIT_CSUM_ENC)
+#define XMIT_GSO		(XMIT_GSO_V4 | XMIT_GSO_V6 | XMIT_GSO_ENC)
 
 /* stuff added to make the code fit 80Col */
 #define CQE_TYPE(cqe_fp_flags)	 ((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 9f7a379..8091de7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3148,27 +3148,44 @@ static __le16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
 static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
 {
 	u32 rc;
+	__u8 prot = 0;
+	__be16 protocol;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		rc = XMIT_PLAIN;
+		return XMIT_PLAIN;
 
-	else {
-		if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
-			rc = XMIT_CSUM_V6;
-			if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
-				rc |= XMIT_CSUM_TCP;
+	protocol = vlan_get_protocol(skb);
+	if (protocol == htons(ETH_P_IPV6)) {
+		rc = XMIT_CSUM_V6;
+		prot = ipv6_hdr(skb)->nexthdr;
+	} else {
+		rc = XMIT_CSUM_V4;
+		prot = ip_hdr(skb)->protocol;
+	}
 
+	if (!CHIP_IS_E1x(bp) && skb->encapsulation) {
+		if (inner_ip_hdr(skb)->version == 6) {
+			rc |= XMIT_CSUM_ENC_V6;
+			if (inner_ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+				rc |= XMIT_CSUM_TCP;
 		} else {
-			rc = XMIT_CSUM_V4;
-			if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+			rc |= XMIT_CSUM_ENC_V4;
+			if (inner_ip_hdr(skb)->protocol == IPPROTO_TCP)
 				rc |= XMIT_CSUM_TCP;
 		}
 	}
+	if (prot == IPPROTO_TCP)
+		rc |= XMIT_CSUM_TCP;
 
-	if (skb_is_gso_v6(skb))
-		rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
-	else if (skb_is_gso(skb))
-		rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
+	if (skb_is_gso_v6(skb)) {
+		rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6);
+		if (rc & XMIT_CSUM_ENC)
+			rc |= XMIT_GSO_ENC_V6;
+	} else if (skb_is_gso(skb)) {
+		rc |= (XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP);
+		if (rc & XMIT_CSUM_ENC)
+			rc |= XMIT_GSO_ENC_V4;
+	}
 
 	return rc;
 }
@@ -3256,11 +3273,20 @@ exit_lbl:
 static void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
 				 u32 xmit_type)
 {
+	struct ipv6hdr *ipv6;
+
 	*parsing_data |= (skb_shinfo(skb)->gso_size <<
 			      ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
 			      ETH_TX_PARSE_BD_E2_LSO_MSS;
-	if ((xmit_type & XMIT_GSO_V6) &&
-	    (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
+
+	if (xmit_type & XMIT_GSO_ENC_V6)
+		ipv6 = inner_ipv6_hdr(skb);
+	else if (xmit_type & XMIT_GSO_V6)
+		ipv6 = ipv6_hdr(skb);
+	else
+		ipv6 = NULL;
+
+	if (ipv6 && ipv6->nexthdr == NEXTHDR_IPV6)
 		*parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
 }
 
@@ -3297,6 +3323,40 @@ static void bnx2x_set_pbd_gso(struct sk_buff *skb,
 }
 
 /**
+ * bnx2x_set_pbd_csum_enc - update PBD with checksum and return header length
+ *
+ * @bp:			driver handle
+ * @skb:		packet skb
+ * @parsing_data:	data to be updated
+ * @xmit_type:		xmit flags
+ *
+ * 57712/578xx related, when skb has encapsulation
+ */
+static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb,
+				 u32 *parsing_data, u32 xmit_type)
+{
+	*parsing_data |=
+		((((u8 *)skb_inner_transport_header(skb) - skb->data) >> 1) <<
+		ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) &
+		ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W;
+
+	if (xmit_type & XMIT_CSUM_TCP) {
+		*parsing_data |= ((inner_tcp_hdrlen(skb) / 4) <<
+			ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
+			ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
+
+		return skb_inner_transport_header(skb) +
+			inner_tcp_hdrlen(skb) - skb->data;
+	}
+
+	/* We support checksum offload for TCP and UDP only.
+	 * No need to pass the UDP header length - it's a constant.
+	 */
+	return skb_inner_transport_header(skb) +
+		sizeof(struct udphdr) - skb->data;
+}
+
+/**
  * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
  *
  * @bp:			driver handle
@@ -3327,13 +3387,14 @@ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
 	return skb_transport_header(skb) + sizeof(struct udphdr) - skb->data;
 }
 
+/* set FW indication according to inner or outer protocols if tunneled */
 static void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
 			       struct eth_tx_start_bd *tx_start_bd,
 			       u32 xmit_type)
 {
 	tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
 
-	if (xmit_type & XMIT_CSUM_V6)
+	if (xmit_type & (XMIT_CSUM_ENC_V6 | XMIT_CSUM_V6))
 		tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IPV6;
 
 	if (!(xmit_type & XMIT_CSUM_TCP))
@@ -3396,6 +3457,72 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
 	return hlen;
 }
 
+static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb,
+				      struct eth_tx_parse_bd_e2 *pbd_e2,
+				      struct eth_tx_parse_2nd_bd *pbd2,
+				      u16 *global_data,
+				      u32 xmit_type)
+{
+	u16 inner_hlen_w = 0;
+	u8 outerip_off, outerip_len = 0;
+
+	/* IP len */
+	inner_hlen_w = (skb_inner_transport_header(skb) -
+			skb_inner_network_header(skb)) >> 1;
+
+	/* transport len */
+	if (xmit_type & XMIT_CSUM_TCP)
+		inner_hlen_w += inner_tcp_hdrlen(skb) >> 1;
+	else
+		inner_hlen_w += sizeof(struct udphdr) >> 1;
+
+	pbd2->fw_ip_hdr_to_payload_w = inner_hlen_w;
+
+	if (xmit_type & XMIT_CSUM_ENC_V4) {
+		struct iphdr *iph = inner_ip_hdr(skb);
+
+		pbd2->fw_ip_csum_wo_len_flags_frag =
+			bswab16(csum_fold((~iph->check) -
+					  iph->tot_len - iph->frag_off));
+	} else {
+		pbd2->fw_ip_hdr_to_payload_w =
+			inner_hlen_w - ((sizeof(struct ipv6hdr)) >> 1);
+	}
+
+	pbd2->tcp_send_seq = bswab32(inner_tcp_hdr(skb)->seq);
+
+	pbd2->tcp_flags = pbd_tcp_flags(inner_tcp_hdr(skb));
+
+	if (xmit_type & XMIT_GSO_V4) {
+		pbd2->hw_ip_id = bswab16(ip_hdr(skb)->id);
+
+		pbd_e2->data.tunnel_data.pseudo_csum =
+			bswab16(~csum_tcpudp_magic(
+					inner_ip_hdr(skb)->saddr,
+					inner_ip_hdr(skb)->daddr,
+					0, IPPROTO_TCP, 0));
+
+		outerip_len = ip_hdr(skb)->ihl << 1;
+	} else {
+		pbd_e2->data.tunnel_data.pseudo_csum =
+			bswab16(~csum_ipv6_magic(
+					&inner_ipv6_hdr(skb)->saddr,
+					&inner_ipv6_hdr(skb)->daddr,
+					0, IPPROTO_TCP, 0));
+	}
+
+	outerip_off = (skb_network_header(skb) - skb->data) >> 1;
+
+	*global_data |=
+		outerip_off |
+		(!!(xmit_type & XMIT_CSUM_V6) <<
+			ETH_TX_PARSE_2ND_BD_IP_HDR_TYPE_OUTER_SHIFT) |
+		(outerip_len <<
+			ETH_TX_PARSE_2ND_BD_IP_HDR_LEN_OUTER_W_SHIFT) |
+		((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
+			ETH_TX_PARSE_2ND_BD_LLC_SNAP_EN_SHIFT);
+}
+
 /* called with netif_tx_lock
  * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
  * netif_wake_queue()
@@ -3411,6 +3538,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
 	struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
 	struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
+	struct eth_tx_parse_2nd_bd *pbd2 = NULL;
 	u32 pbd_e2_parsing_data = 0;
 	u16 pkt_prod, bd_prod;
 	int nbd, txq_index;
@@ -3567,12 +3695,46 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!CHIP_IS_E1x(bp)) {
 		pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
 		memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
-		/* Set PBD in checksum offload case */
-		if (xmit_type & XMIT_CSUM)
+
+		if (xmit_type & XMIT_CSUM_ENC) {
+			u16 global_data = 0;
+
+			/* Set PBD in enc checksum offload case */
+			hlen = bnx2x_set_pbd_csum_enc(bp, skb,
+						      &pbd_e2_parsing_data,
+						      xmit_type);
+
+			/* turn on 2nd parsing and get a BD */
+			bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
+
+			pbd2 = &txdata->tx_desc_ring[bd_prod].parse_2nd_bd;
+
+			memset(pbd2, 0, sizeof(*pbd2));
+
+			pbd_e2->data.tunnel_data.ip_hdr_start_inner_w =
+				(skb_inner_network_header(skb) -
+				 skb->data) >> 1;
+
+			if (xmit_type & XMIT_GSO_ENC)
+				bnx2x_update_pbds_gso_enc(skb, pbd_e2, pbd2,
+							  &global_data,
+							  xmit_type);
+
+			pbd2->global_data = cpu_to_le16(global_data);
+
+			/* add addition parse BD indication to start BD */
+			SET_FLAG(tx_start_bd->general_data,
+				 ETH_TX_START_BD_PARSE_NBDS, 1);
+			/* set encapsulation flag in start BD */
+			SET_FLAG(tx_start_bd->general_data,
+				 ETH_TX_START_BD_TUNNEL_EXIST, 1);
+			nbd++;
+		} else if (xmit_type & XMIT_CSUM) {
 			/* Set PBD in checksum offload case w/o encapsulation */
 			hlen = bnx2x_set_pbd_csum_e2(bp, skb,
 						     &pbd_e2_parsing_data,
 						     xmit_type);
+		}
 
 		/* Add the macs to the parsing BD this is a vf */
 		if (IS_VF(bp)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 04d123f..4902d1e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -11965,6 +11965,13 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
 		NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 |
 		NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO |
 		NETIF_F_RXHASH | NETIF_F_HW_VLAN_TX;
+	if (!CHIP_IS_E1x(bp)) {
+		dev->hw_features |= NETIF_F_GSO_GRE;
+		dev->hw_enc_features =
+			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
+			NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 |
+			NETIF_F_GSO_GRE;
+	}
 
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
-- 
1.7.7.2

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-18 16:51 [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols Dmitry Kravkov
@ 2013-03-18 16:51 ` Dmitry Kravkov
  2013-03-18 17:11   ` David Miller
  2013-03-19  0:07   ` Eric Dumazet
  2013-03-18 17:11 ` [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols David Miller
  1 sibling, 2 replies; 19+ messages in thread
From: Dmitry Kravkov @ 2013-03-18 16:51 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein

The patch drives FW to perform RSS for GRE traffic,
based on inner headers.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 8f96372..f9098d8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -973,6 +973,9 @@ static inline int bnx2x_func_start(struct bnx2x *bp)
 	else /* CHIP_IS_E1X */
 		start_params->network_cos_mode = FW_WRR;
 
+	start_params->gre_tunnel_mode = IPGRE_TUNNEL;
+	start_params->gre_tunnel_rss = GRE_INNER_HEADERS_RSS;
+
 	return bnx2x_func_state_change(bp, &func_params);
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 66ab259..5bdc1d6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -5679,17 +5679,18 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp,
 	memset(rdata, 0, sizeof(*rdata));
 
 	/* Fill the ramrod data with provided parameters */
-	rdata->function_mode    = (u8)start_params->mf_mode;
-	rdata->sd_vlan_tag      = cpu_to_le16(start_params->sd_vlan_tag);
-	rdata->path_id          = BP_PATH(bp);
-	rdata->network_cos_mode = start_params->network_cos_mode;
-
-	/*
-	 *  No need for an explicit memory barrier here as long we would
-	 *  need to ensure the ordering of writing to the SPQ element
-	 *  and updating of the SPQ producer which involves a memory
-	 *  read and we will have to put a full memory barrier there
-	 *  (inside bnx2x_sp_post()).
+	rdata->function_mode	= (u8)start_params->mf_mode;
+	rdata->sd_vlan_tag	= cpu_to_le16(start_params->sd_vlan_tag);
+	rdata->path_id		= BP_PATH(bp);
+	rdata->network_cos_mode	= start_params->network_cos_mode;
+	rdata->gre_tunnel_mode	= start_params->gre_tunnel_mode;
+	rdata->gre_tunnel_rss	= start_params->gre_tunnel_rss;
+
+	/* No need for an explicit memory barrier here as long we would
+	 * need to ensure the ordering of writing to the SPQ element
+	 * and updating of the SPQ producer which involves a memory
+	 * read and we will have to put a full memory barrier there
+	 * (inside bnx2x_sp_post()).
 	 */
 
 	return bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_FUNCTION_START, 0,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 064dba2..35479da 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -1123,6 +1123,15 @@ struct bnx2x_func_start_params {
 
 	/* Function cos mode */
 	u8 network_cos_mode;
+
+	/* NVGRE classification enablement */
+	u8 nvgre_clss_en;
+
+	/* NO_GRE_TUNNEL/NVGRE_TUNNEL/L2GRE_TUNNEL/IPGRE_TUNNEL */
+	u8 gre_tunnel_mode;
+
+	/* GRE_OUTER_HEADERS_RSS/GRE_INNER_HEADERS_RSS/NVGRE_KEY_ENTROPY_RSS */
+	u8 gre_tunnel_rss;
 };
 
 struct bnx2x_func_switch_update_params {
-- 
1.7.7.2

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols
  2013-03-18 16:51 [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols Dmitry Kravkov
  2013-03-18 16:51 ` [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic Dmitry Kravkov
@ 2013-03-18 17:11 ` David Miller
  1 sibling, 0 replies; 19+ messages in thread
From: David Miller @ 2013-03-18 17:11 UTC (permalink / raw)
  To: dmitry; +Cc: netdev, eilong

From: "Dmitry Kravkov" <dmitry@broadcom.com>
Date: Mon, 18 Mar 2013 18:51:03 +0200

> The patch utilizes FW offload capabilities for
> encapsulation protocols.
> 
> Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
> Signed-off-by: Eilon Greenstein <eilong@broadcom.com>

Applied.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-18 16:51 ` [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic Dmitry Kravkov
@ 2013-03-18 17:11   ` David Miller
  2013-03-19  0:07   ` Eric Dumazet
  1 sibling, 0 replies; 19+ messages in thread
From: David Miller @ 2013-03-18 17:11 UTC (permalink / raw)
  To: dmitry; +Cc: netdev, eilong

From: "Dmitry Kravkov" <dmitry@broadcom.com>
Date: Mon, 18 Mar 2013 18:51:04 +0200

> The patch drives FW to perform RSS for GRE traffic,
> based on inner headers.
> 
> Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
> Signed-off-by: Eilon Greenstein <eilong@broadcom.com>

Applied.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-18 16:51 ` [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic Dmitry Kravkov
  2013-03-18 17:11   ` David Miller
@ 2013-03-19  0:07   ` Eric Dumazet
  2013-03-19  6:30     ` Dmitry Kravkov
  1 sibling, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2013-03-19  0:07 UTC (permalink / raw)
  To: Dmitry Kravkov
  Cc: davem, netdev, Eilon Greenstein, Tom Herbert, Maciej Żenczykowski

On Mon, 2013-03-18 at 18:51 +0200, Dmitry Kravkov wrote:
> The patch drives FW to perform RSS for GRE traffic,
> based on inner headers.
> 
> Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
> Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
> ---
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
>  3 files changed, 24 insertions(+), 11 deletions(-)

This works very well.

Problem is we skb_set_queue_mapping(skb, 0); in __skb_tunnel_rx()

(this was a patch from Tom Herbert, commit
693019e90ca45d881109d32c0c6d29adf03f6447 (net: reset skb queue mapping
when rx'ing over tunnel )

Meaning we hit a single cpu for the GRO stuff in ip_gre.

I have to think about it.


Another question is : 

Can bnx2x check the tcp checksum if GRE encapsulated ?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-19  0:07   ` Eric Dumazet
@ 2013-03-19  6:30     ` Dmitry Kravkov
  2013-03-19  9:18       ` Maciej Żenczykowski
  2013-08-17 17:53       ` Jerry Chu
  0 siblings, 2 replies; 19+ messages in thread
From: Dmitry Kravkov @ 2013-03-19  6:30 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, netdev, Eilon Greenstein, Tom Herbert, Maciej Żenczykowski

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Eric Dumazet
> Sent: Tuesday, March 19, 2013 2:08 AM
> To: Dmitry Kravkov
> Cc: davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert; Maciej Żenczykowski
> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
> 
> On Mon, 2013-03-18 at 18:51 +0200, Dmitry Kravkov wrote:
> > The patch drives FW to perform RSS for GRE traffic,
> > based on inner headers.
> >
> > Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
> > Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
> > ---
> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
> >  3 files changed, 24 insertions(+), 11 deletions(-)
> 
> This works very well.
> 
> Problem is we skb_set_queue_mapping(skb, 0); in __skb_tunnel_rx()
> 
> (this was a patch from Tom Herbert, commit
> 693019e90ca45d881109d32c0c6d29adf03f6447 (net: reset skb queue mapping
> when rx'ing over tunnel )
> 
> Meaning we hit a single cpu for the GRO stuff in ip_gre.
> 
> I have to think about it.
> 
> 
> Another question is :
> 
> Can bnx2x check the tcp checksum if GRE encapsulated ?
> 
Current HW can't provide this. Probably, it's possible to separate CSUM from GRO/TPA then stack will have to handle CSUM validation for huge packets. Is it worth?


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-19  6:30     ` Dmitry Kravkov
@ 2013-03-19  9:18       ` Maciej Żenczykowski
  2013-03-19 12:21         ` Dmitry Kravkov
  2013-03-19 12:25         ` Eric Dumazet
  2013-08-17 17:53       ` Jerry Chu
  1 sibling, 2 replies; 19+ messages in thread
From: Maciej Żenczykowski @ 2013-03-19  9:18 UTC (permalink / raw)
  To: Dmitry Kravkov; +Cc: Eric Dumazet, davem, netdev, Eilon Greenstein, Tom Herbert

Can the HW calculate and return a 1s complement sum of the entire
packet (or a large portion there-of)?
Fixing that up to be only of the outer IPv4, inner IPv4 and inner TCP
relevant portions should still be simpler (well faster) than
calculating the TCP checksum.
I'm pretty sure that some relationship between 1s complement sum of
all bytes, outer IPv4 checksum, inner IPv4 checksum and TCP checksum
could be pulled out of a hat with some deeper thought.  (similarly for
IPv4/GRE/IPv6/TCP and other combinations)

What portions of the packet can the HW/FW [partially] checksum - and
return the value to the driver for further processing?
Can it return 1s complement sum of data portion of outer IPv4 (ie. in
IPv4/GRE/IPv4/TCP return a 1s complement sum of GRE/IPv4/TCP bytes)

Maciej Żenczykowski, Kernel Networking Developer @ Google

On Mon, Mar 18, 2013 at 11:30 PM, Dmitry Kravkov <dmitry@broadcom.com> wrote:
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Eric Dumazet
>> Sent: Tuesday, March 19, 2013 2:08 AM
>> To: Dmitry Kravkov
>> Cc: davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert; Maciej Żenczykowski
>> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
>>
>> On Mon, 2013-03-18 at 18:51 +0200, Dmitry Kravkov wrote:
>> > The patch drives FW to perform RSS for GRE traffic,
>> > based on inner headers.
>> >
>> > Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
>> > Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
>> > ---
>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
>> >  3 files changed, 24 insertions(+), 11 deletions(-)
>>
>> This works very well.
>>
>> Problem is we skb_set_queue_mapping(skb, 0); in __skb_tunnel_rx()
>>
>> (this was a patch from Tom Herbert, commit
>> 693019e90ca45d881109d32c0c6d29adf03f6447 (net: reset skb queue mapping
>> when rx'ing over tunnel )
>>
>> Meaning we hit a single cpu for the GRO stuff in ip_gre.
>>
>> I have to think about it.
>>
>>
>> Another question is :
>>
>> Can bnx2x check the tcp checksum if GRE encapsulated ?
>>
> Current HW can't provide this. Probably, it's possible to separate CSUM from GRO/TPA then stack will have to handle CSUM validation for huge packets. Is it worth?
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-19  9:18       ` Maciej Żenczykowski
@ 2013-03-19 12:21         ` Dmitry Kravkov
  2013-03-19 12:25         ` Eric Dumazet
  1 sibling, 0 replies; 19+ messages in thread
From: Dmitry Kravkov @ 2013-03-19 12:21 UTC (permalink / raw)
  To: Maciej Żenczykowski
  Cc: Eric Dumazet, davem, netdev, Eilon Greenstein, Tom Herbert


> -----Original Message-----
> From: Maciej Żenczykowski [mailto:maze@google.com]
> Sent: Tuesday, March 19, 2013 11:18 AM
> To: Dmitry Kravkov
> Cc: Eric Dumazet; davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert
> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
> 
> Can the HW calculate and return a 1s complement sum of the entire
> packet (or a large portion there-of)?
No, it can't :(

> Fixing that up to be only of the outer IPv4, inner IPv4 and inner TCP
> relevant portions should still be simpler (well faster) than
> calculating the TCP checksum.
> I'm pretty sure that some relationship between 1s complement sum of
> all bytes, outer IPv4 checksum, inner IPv4 checksum and TCP checksum
> could be pulled out of a hat with some deeper thought.  (similarly for
> IPv4/GRE/IPv6/TCP and other combinations)
> 
> What portions of the packet can the HW/FW [partially] checksum - and
> return the value to the driver for further processing?
> Can it return 1s complement sum of data portion of outer IPv4 (ie. in
> IPv4/GRE/IPv4/TCP return a 1s complement sum of GRE/IPv4/TCP bytes)
> 
> Maciej Żenczykowski, Kernel Networking Developer @ Google
> 
> On Mon, Mar 18, 2013 at 11:30 PM, Dmitry Kravkov <dmitry@broadcom.com> wrote:
> >> -----Original Message-----
> >> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Eric Dumazet
> >> Sent: Tuesday, March 19, 2013 2:08 AM
> >> To: Dmitry Kravkov
> >> Cc: davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert; Maciej Żenczykowski
> >> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
> >>
> >> On Mon, 2013-03-18 at 18:51 +0200, Dmitry Kravkov wrote:
> >> > The patch drives FW to perform RSS for GRE traffic,
> >> > based on inner headers.
> >> >
> >> > Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
> >> > Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
> >> > ---
> >> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
> >> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
> >> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
> >> >  3 files changed, 24 insertions(+), 11 deletions(-)
> >>
> >> This works very well.
> >>
> >> Problem is we skb_set_queue_mapping(skb, 0); in __skb_tunnel_rx()
> >>
> >> (this was a patch from Tom Herbert, commit
> >> 693019e90ca45d881109d32c0c6d29adf03f6447 (net: reset skb queue mapping
> >> when rx'ing over tunnel )
> >>
> >> Meaning we hit a single cpu for the GRO stuff in ip_gre.
> >>
> >> I have to think about it.
> >>
> >>
> >> Another question is :
> >>
> >> Can bnx2x check the tcp checksum if GRE encapsulated ?
> >>
> > Current HW can't provide this. Probably, it's possible to separate CSUM from GRO/TPA then stack will have to handle CSUM
> validation for huge packets. Is it worth?
> >


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-19  9:18       ` Maciej Żenczykowski
  2013-03-19 12:21         ` Dmitry Kravkov
@ 2013-03-19 12:25         ` Eric Dumazet
  2013-03-19 13:43           ` Dmitry Kravkov
  1 sibling, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2013-03-19 12:25 UTC (permalink / raw)
  To: Maciej Żenczykowski
  Cc: Dmitry Kravkov, davem, netdev, Eilon Greenstein, Tom Herbert

Please Maciej do not top post on lkml or netdev mailing lists.

On Tue, 2013-03-19 at 02:18 -0700, Maciej Żenczykowski wrote:
> Can the HW calculate and return a 1s complement sum of the entire
> packet (or a large portion there-of)?
> Fixing that up to be only of the outer IPv4, inner IPv4 and inner TCP
> relevant portions should still be simpler (well faster) than
> calculating the TCP checksum.
> I'm pretty sure that some relationship between 1s complement sum of
> all bytes, outer IPv4 checksum, inner IPv4 checksum and TCP checksum
> could be pulled out of a hat with some deeper thought.  (similarly for
> IPv4/GRE/IPv6/TCP and other combinations)
> 
> What portions of the packet can the HW/FW [partially] checksum - and
> return the value to the driver for further processing?
> Can it return 1s complement sum of data portion of outer IPv4 (ie. in
> IPv4/GRE/IPv4/TCP return a 1s complement sum of GRE/IPv4/TCP bytes)
> 

I assume Dmitry was speaking of this possibility, and our stack should
handle this just fine.

NIC providing these kind of checksums set :

skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum;

before feeding the packet to the stack.

When we pull some header, we have to call skb_postpull_rcsum()
to adjust the skb->csum so that final check can be done.

About 20 drivers currently provide these kind of checksumming.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-19 12:25         ` Eric Dumazet
@ 2013-03-19 13:43           ` Dmitry Kravkov
  0 siblings, 0 replies; 19+ messages in thread
From: Dmitry Kravkov @ 2013-03-19 13:43 UTC (permalink / raw)
  To: Eric Dumazet, Maciej Żenczykowski
  Cc: davem, netdev, Eilon Greenstein, Tom Herbert

> -----Original Message-----
> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> Sent: Tuesday, March 19, 2013 2:26 PM
> To: Maciej Żenczykowski
> Cc: Dmitry Kravkov; davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert
> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
> 
> Please Maciej do not top post on lkml or netdev mailing lists.
> 
> On Tue, 2013-03-19 at 02:18 -0700, Maciej Żenczykowski wrote:
> > Can the HW calculate and return a 1s complement sum of the entire
> > packet (or a large portion there-of)?
> > Fixing that up to be only of the outer IPv4, inner IPv4 and inner TCP
> > relevant portions should still be simpler (well faster) than
> > calculating the TCP checksum.
> > I'm pretty sure that some relationship between 1s complement sum of
> > all bytes, outer IPv4 checksum, inner IPv4 checksum and TCP checksum
> > could be pulled out of a hat with some deeper thought.  (similarly for
> > IPv4/GRE/IPv6/TCP and other combinations)
> >
> > What portions of the packet can the HW/FW [partially] checksum - and
> > return the value to the driver for further processing?
> > Can it return 1s complement sum of data portion of outer IPv4 (ie. in
> > IPv4/GRE/IPv4/TCP return a 1s complement sum of GRE/IPv4/TCP bytes)
> >
> 
> I assume Dmitry was speaking of this possibility, and our stack should
> handle this just fine.

In case of tunneling bnx2x HW can not provide csum of any portion of the packet.
Flag for XSUM_NO_VALIDATION on cqe will be set for all gre packets.
As a result driver will leave:
skb->ip_summed = CHECKSUM_NONE;

> 
> NIC providing these kind of checksums set :
> 
> skb->ip_summed = CHECKSUM_COMPLETE;
> skb->csum = csum;
> 
> before feeding the packet to the stack.
> 
> When we pull some header, we have to call skb_postpull_rcsum()
> to adjust the skb->csum so that final check can be done.
> 
> About 20 drivers currently provide these kind of checksumming.
> 


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-03-19  6:30     ` Dmitry Kravkov
  2013-03-19  9:18       ` Maciej Żenczykowski
@ 2013-08-17 17:53       ` Jerry Chu
  2013-08-17 18:52         ` Dmitry Kravkov
  1 sibling, 1 reply; 19+ messages in thread
From: Jerry Chu @ 2013-08-17 17:53 UTC (permalink / raw)
  To: Dmitry Kravkov
  Cc: Eric Dumazet, davem, netdev, Eilon Greenstein, Tom Herbert,
	Maciej Żenczykowski

Dmitry,

On Mon, Mar 18, 2013 at 11:30 PM, Dmitry Kravkov <dmitry@broadcom.com> wrote:
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Eric Dumazet
>> Sent: Tuesday, March 19, 2013 2:08 AM
>> To: Dmitry Kravkov
>> Cc: davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert; Maciej Żenczykowski
>> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
>>
>> On Mon, 2013-03-18 at 18:51 +0200, Dmitry Kravkov wrote:
>> > The patch drives FW to perform RSS for GRE traffic,
>> > based on inner headers.
>> >
>> > Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
>> > Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
>> > ---
>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
>> >  3 files changed, 24 insertions(+), 11 deletions(-)
>>
>> This works very well.
>>
>> Problem is we skb_set_queue_mapping(skb, 0); in __skb_tunnel_rx()
>>
>> (this was a patch from Tom Herbert, commit
>> 693019e90ca45d881109d32c0c6d29adf03f6447 (net: reset skb queue mapping
>> when rx'ing over tunnel )
>>
>> Meaning we hit a single cpu for the GRO stuff in ip_gre.
>>
>> I have to think about it.
>>
>>
>> Another question is :
>>
>> Can bnx2x check the tcp checksum if GRE encapsulated ?
>>
> Current HW can't provide this. Probably, it's possible to separate CSUM from GRO/TPA then stack will have to handle CSUM validation for huge packets. Is it worth?

Could you elaborate on what you meant above? (I'm looking for any form
of h/w assist for
csum validation for GRO/TPA pkts since csum computation can be expensive and  as
you said below CHECKSUM_COMPLETE is out of the question.)

Thanks,

Jerry

>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-17 17:53       ` Jerry Chu
@ 2013-08-17 18:52         ` Dmitry Kravkov
  2013-08-17 19:01           ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Dmitry Kravkov @ 2013-08-17 18:52 UTC (permalink / raw)
  To: Jerry Chu
  Cc: Dmitry Kravkov, Eric Dumazet, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski

On Sat, Aug 17, 2013 at 8:53 PM, Jerry Chu <hkchu@google.com> wrote:
> Dmitry,
>
> On Mon, Mar 18, 2013 at 11:30 PM, Dmitry Kravkov <dmitry@broadcom.com> wrote:
>>> -----Original Message-----
>>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Eric Dumazet
>>> Sent: Tuesday, March 19, 2013 2:08 AM
>>> To: Dmitry Kravkov
>>> Cc: davem@davemloft.net; netdev@vger.kernel.org; Eilon Greenstein; Tom Herbert; Maciej Żenczykowski
>>> Subject: Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
>>>
>>> On Mon, 2013-03-18 at 18:51 +0200, Dmitry Kravkov wrote:
>>> > The patch drives FW to perform RSS for GRE traffic,
>>> > based on inner headers.
>>> >
>>> > Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
>>> > Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
>>> > ---
>>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +++
>>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c  |   23 ++++++++++++-----------
>>> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h  |    9 +++++++++
>>> >  3 files changed, 24 insertions(+), 11 deletions(-)
>>>
>>> This works very well.
>>>
>>> Problem is we skb_set_queue_mapping(skb, 0); in __skb_tunnel_rx()
>>>
>>> (this was a patch from Tom Herbert, commit
>>> 693019e90ca45d881109d32c0c6d29adf03f6447 (net: reset skb queue mapping
>>> when rx'ing over tunnel )
>>>
>>> Meaning we hit a single cpu for the GRO stuff in ip_gre.
>>>
>>> I have to think about it.
>>>
>>>
>>> Another question is :
>>>
>>> Can bnx2x check the tcp checksum if GRE encapsulated ?
>>>
>> Current HW can't provide this. Probably, it's possible to separate CSUM from GRO/TPA then stack will have to handle CSUM validation for huge packets. Is it worth?
>
> Could you elaborate on what you meant above? (I'm looking for any form
> of h/w assist for
> csum validation for GRO/TPA pkts since csum computation can be expensive and  as
> you said below CHECKSUM_COMPLETE is out of the question.)
>

Current bnx2x HW is not able to perform CSUM validation for
encapsulated packets, so in any case host needs to do that.
Today GRO/TPA feature depends on CSUM, but theoretically (i did not
investigate it) and probably HW can provide aggregated packets w/o
csum validation - this will save headers processing for host.


> Thanks,
>
> Jerry
>
>>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-17 18:52         ` Dmitry Kravkov
@ 2013-08-17 19:01           ` Eric Dumazet
  2013-08-17 19:04             ` Dmitry Kravkov
  2013-08-18 11:55             ` Jerry Chu
  0 siblings, 2 replies; 19+ messages in thread
From: Eric Dumazet @ 2013-08-17 19:01 UTC (permalink / raw)
  To: Dmitry Kravkov
  Cc: Jerry Chu, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski

On Sat, 2013-08-17 at 21:52 +0300, Dmitry Kravkov wrote:

> 
> Current bnx2x HW is not able to perform CSUM validation for
> encapsulated packets, so in any case host needs to do that.
> Today GRO/TPA feature depends on CSUM, but theoretically (i did not
> investigate it) and probably HW can provide aggregated packets w/o
> csum validation - this will save headers processing for host.

I am not sure I understand this.

Aggregation cannot be done if csums are not validated.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-17 19:01           ` Eric Dumazet
@ 2013-08-17 19:04             ` Dmitry Kravkov
  2013-08-18 11:55             ` Jerry Chu
  1 sibling, 0 replies; 19+ messages in thread
From: Dmitry Kravkov @ 2013-08-17 19:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jerry Chu, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski

On Sat, Aug 17, 2013 at 10:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sat, 2013-08-17 at 21:52 +0300, Dmitry Kravkov wrote:
>
>>
>> Current bnx2x HW is not able to perform CSUM validation for
>> encapsulated packets, so in any case host needs to do that.
>> Today GRO/TPA feature depends on CSUM, but theoretically (i did not
>> investigate it) and probably HW can provide aggregated packets w/o
>> csum validation - this will save headers processing for host.
>
> I am not sure I understand this.
>
> Aggregation cannot be done if csums are not validated.
>
>

Thanks. Eric. So we can't do with current bnx2x HW.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-17 19:01           ` Eric Dumazet
  2013-08-17 19:04             ` Dmitry Kravkov
@ 2013-08-18 11:55             ` Jerry Chu
  2013-08-18 15:37               ` Eric Dumazet
  1 sibling, 1 reply; 19+ messages in thread
From: Jerry Chu @ 2013-08-18 11:55 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Dmitry Kravkov, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski

On Sat, Aug 17, 2013 at 12:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sat, 2013-08-17 at 21:52 +0300, Dmitry Kravkov wrote:
>
>>
>> Current bnx2x HW is not able to perform CSUM validation for
>> encapsulated packets, so in any case host needs to do that.
>> Today GRO/TPA feature depends on CSUM, but theoretically (i did not
>> investigate it) and probably HW can provide aggregated packets w/o
>> csum validation - this will save headers processing for host.
>
> I am not sure I understand this.
>
> Aggregation cannot be done if csums are not validated.

Unless all the csums from aggregated pkts are also aggregated into one
(so that the host computes s/w csum on the large pkt and validates against
the aggregated csum...) There may be some performance benefit for doing
this but it may arguably weaken already weak 1's complement csum.

Jerry

>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-18 11:55             ` Jerry Chu
@ 2013-08-18 15:37               ` Eric Dumazet
  2013-08-18 21:11                 ` Jerry Chu
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2013-08-18 15:37 UTC (permalink / raw)
  To: Jerry Chu
  Cc: Dmitry Kravkov, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski

On Sun, 2013-08-18 at 04:55 -0700, Jerry Chu wrote:
> On Sat, Aug 17, 2013 at 12:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Sat, 2013-08-17 at 21:52 +0300, Dmitry Kravkov wrote:
> >
> >>
> >> Current bnx2x HW is not able to perform CSUM validation for
> >> encapsulated packets, so in any case host needs to do that.
> >> Today GRO/TPA feature depends on CSUM, but theoretically (i did not
> >> investigate it) and probably HW can provide aggregated packets w/o
> >> csum validation - this will save headers processing for host.
> >
> > I am not sure I understand this.
> >
> > Aggregation cannot be done if csums are not validated.
> 
> Unless all the csums from aggregated pkts are also aggregated into one
> (so that the host computes s/w csum on the large pkt and validates against
> the aggregated csum...) There may be some performance benefit for doing
> this but it may arguably weaken already weak 1's complement csum.

To aggregate two packets, you first have to make sure the checksums of
each packet are ok. If the hardware does not validate checksum, then it
cannot aggregate packets.

Sure, CHECKSUM_COMPLETE support would be nice, so that linux can perform
aggregation without having to bring into cpu cache the whole frame.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-18 15:37               ` Eric Dumazet
@ 2013-08-18 21:11                 ` Jerry Chu
  2013-08-18 23:44                   ` Eric Dumazet
  0 siblings, 1 reply; 19+ messages in thread
From: Jerry Chu @ 2013-08-18 21:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Dmitry Kravkov, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski, Uday Naik, Michael Dalton

+mwdalton, uday

On Sun, Aug 18, 2013 at 8:37 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sun, 2013-08-18 at 04:55 -0700, Jerry Chu wrote:
>> On Sat, Aug 17, 2013 at 12:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> > On Sat, 2013-08-17 at 21:52 +0300, Dmitry Kravkov wrote:
>> >
>> >>
>> >> Current bnx2x HW is not able to perform CSUM validation for
>> >> encapsulated packets, so in any case host needs to do that.
>> >> Today GRO/TPA feature depends on CSUM, but theoretically (i did not
>> >> investigate it) and probably HW can provide aggregated packets w/o
>> >> csum validation - this will save headers processing for host.
>> >
>> > I am not sure I understand this.
>> >
>> > Aggregation cannot be done if csums are not validated.
>>
>> Unless all the csums from aggregated pkts are also aggregated into one
>> (so that the host computes s/w csum on the large pkt and validates against
>> the aggregated csum...) There may be some performance benefit for doing
>> this but it may arguably weaken already weak 1's complement csum.
>
> To aggregate two packets, you first have to make sure the checksums of
> each packet are ok. If the hardware does not validate checksum, then it
> cannot aggregate packets.

It can if the csum validation is deferred. But this requires the stack code to
aggregate csum from individual pkt header, which can get ugly as one needs
to remove the header part from the individual csum first. It may also weaken
the already weak TCP csum as i mentioned previously. Also one corrupted
pkt will cause the whole GRO pkt to be thrown away. But I won't worry about
it for the data center environment where pkt corruption is rare.

>
> Sure, CHECKSUM_COMPLETE support would be nice, so that linux can perform
> aggregation without having to bring into cpu cache the whole frame.

I wish CHECKSUM_COMPLETE can be supported by the h/w - we are currently
running an older kernel w/o RSS for GRE support so all the GRE traffic between
two IPs hits the same NIC queue. With my GRO-GRE patch (to be upstreamed
later) I'm 20% below line rate (7.1Gbps) on 10GbE multi-TCP stream test because
RSS will saturate a single CPU that is handling softirq. The majority
of CPU cost
was from bcopy (~20%) followed by csum computation (4%). If I comment out the
csum validation in the GRO code then I hit line rate (9.1Gbps) right away on the
same test.

In order for me to saturate the line rate but without h/w CHECKSUM_COMPLETE
support, one other alternative is to back port the RSS for GRE support, which
seems a daunting task (if not please tell me). If I defer the csum
validation after
GRO then RPS will spread out traffic and the csum cost so we are no longer
bottlenecked on one CPU (and hopefully hit the line rate).

Jerry

>
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-18 21:11                 ` Jerry Chu
@ 2013-08-18 23:44                   ` Eric Dumazet
  2013-08-19 14:57                     ` Jerry Chu
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2013-08-18 23:44 UTC (permalink / raw)
  To: Jerry Chu
  Cc: Dmitry Kravkov, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski, Uday Naik, Michael Dalton

On Sun, 2013-08-18 at 14:11 -0700, Jerry Chu wrote:

> It can if the csum validation is deferred. But this requires the stack code to
> aggregate csum from individual pkt header, which can get ugly as one needs
> to remove the header part from the individual csum first.

You can _not_ aggregate packets _before_ validating their checksum, or
else you could end up with a correct final checksum while two or more
segments had buggy checksums. Checksum are already quite weak, please
do not make them even weaker ;)

GRO should not throw out any segments, it is not its role.

If a router receives a bunch of packets, it should forward them
regardless of the (TCP) checksum being good or not.

The final receiver has full responsibility for ultimately validate the
checksums and update various SNMP counters.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic
  2013-08-18 23:44                   ` Eric Dumazet
@ 2013-08-19 14:57                     ` Jerry Chu
  0 siblings, 0 replies; 19+ messages in thread
From: Jerry Chu @ 2013-08-19 14:57 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Dmitry Kravkov, Dmitry Kravkov, davem, netdev, Eilon Greenstein,
	Tom Herbert, Maciej Żenczykowski, Uday Naik, Michael Dalton

On Sun, Aug 18, 2013 at 4:44 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Sun, 2013-08-18 at 14:11 -0700, Jerry Chu wrote:
>
>> It can if the csum validation is deferred. But this requires the stack code to
>> aggregate csum from individual pkt header, which can get ugly as one needs
>> to remove the header part from the individual csum first.
>
> You can _not_ aggregate packets _before_ validating their checksum, or
> else you could end up with a correct final checksum while two or more
> segments had buggy checksums. Checksum are already quite weak, please
> do not make them even weaker ;)

Yes I know how weak the 16bit 1's complement inet csum (see my previous email).
I've had the pleasure to debug a number of bugs in the past where 16bit words
were flipped by the buggy h/w hence went undetected by the weak TCP csum.

But I also think it's a tradeoff call. For WAN it's probably a bad
idea but for data
center traffic, pkt corruption is usually rare. If the performance
gain is large enough,
perhaps it justifies this hack (?)

>
> GRO should not throw out any segments, it is not its role.

I did not suggest GRO to throw away any pkt. GRO would just fix the csum
field in the header of the aggregated pkt and passes it on.

>
> If a router receives a bunch of packets, it should forward them
> regardless of the (TCP) checksum being good or not.

It won't affect the forwarding path (i think).

>
> The final receiver has full responsibility for ultimately validate the
> checksums and update various SNMP counters.

When the GRO pkt with the csum field fixed (aggregated) get to
the receiving TCP endpoint, it will be csum validated by the TCP stack
because skb->ip_summed == CHECKSUM_NONE, and the pkt will be
tossed if the csum check fails like non-GRO pkts.

To be clear, this hack is not something I'd be proud of and I doubt the
performance gain will be enough to justify it. This idea grew more out of
desperation for not having csum offload for encap'ed pkts, which I think
is a P0 bug!

Jerry

>
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2013-08-19 14:57 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-18 16:51 [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols Dmitry Kravkov
2013-03-18 16:51 ` [PATCH net-next 2/2] bnx2x: add RSS capability for GRE traffic Dmitry Kravkov
2013-03-18 17:11   ` David Miller
2013-03-19  0:07   ` Eric Dumazet
2013-03-19  6:30     ` Dmitry Kravkov
2013-03-19  9:18       ` Maciej Żenczykowski
2013-03-19 12:21         ` Dmitry Kravkov
2013-03-19 12:25         ` Eric Dumazet
2013-03-19 13:43           ` Dmitry Kravkov
2013-08-17 17:53       ` Jerry Chu
2013-08-17 18:52         ` Dmitry Kravkov
2013-08-17 19:01           ` Eric Dumazet
2013-08-17 19:04             ` Dmitry Kravkov
2013-08-18 11:55             ` Jerry Chu
2013-08-18 15:37               ` Eric Dumazet
2013-08-18 21:11                 ` Jerry Chu
2013-08-18 23:44                   ` Eric Dumazet
2013-08-19 14:57                     ` Jerry Chu
2013-03-18 17:11 ` [PATCH net-next 1/2] bnx2x: add CSUM and TSO support for encapsulation protocols David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.