linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ibmveth: Support to enable LSO/CSO for Trunk VEA.
@ 2017-04-07  9:57 Sivakumar Krishnasamy
  2017-04-08 18:45 ` David Miller
  2017-05-19  9:30 ` [PATCH net-next v2] " Sivakumar Krishnasamy
  0 siblings, 2 replies; 5+ messages in thread
From: Sivakumar Krishnasamy @ 2017-04-07  9:57 UTC (permalink / raw)
  To: netdev
  Cc: tlfalcon, benh, paulus, mpe, linuxppc-dev, linux-kernel, brking,
	seroyer, Sivakumar Krishnasamy

Enable largesend and checksum offload for ibmveth configured in trunk mode.
Added support to SKB frag_list in TX path by skb_linearize'ing such SKBs.

Signed-off-by: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmveth.c | 102 ++++++++++++++++++++++++++++++-------
 drivers/net/ethernet/ibm/ibmveth.h |   1 +
 2 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 72ab7b6..e1e238d 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -46,6 +46,8 @@
 #include <asm/vio.h>
 #include <asm/iommu.h>
 #include <asm/firmware.h>
+#include <net/tcp.h>
+#include <net/ip6_checksum.h>
 
 #include "ibmveth.h"
 
@@ -808,8 +810,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 
 	ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
 
-	if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) &&
-	    !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) &&
+	if (ret == H_SUCCESS &&
 	    (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
 		ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
 					 set_attr, &ret_attr);
@@ -1040,6 +1041,15 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 	dma_addr_t dma_addr;
 	unsigned long mss = 0;
 
+	/* veth doesn't handle frag_list, so linearize the skb.
+	 * When GRO is enabled SKB's can have frag_list.
+	 */
+	if (adapter->is_active_trunk &&
+	    skb_has_frag_list(skb) && __skb_linearize(skb)) {
+		netdev->stats.tx_dropped++;
+		goto out;
+	}
+
 	/*
 	 * veth handles a maximum of 6 segments including the header, so
 	 * we have to linearize the skb if there are more than this.
@@ -1064,9 +1074,6 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 
 	desc_flags = IBMVETH_BUF_VALID;
 
-	if (skb_is_gso(skb) && adapter->fw_large_send_support)
-		desc_flags |= IBMVETH_BUF_LRG_SND;
-
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		unsigned char *buf = skb_transport_header(skb) +
 						skb->csum_offset;
@@ -1076,6 +1083,9 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 		/* Need to zero out the checksum */
 		buf[0] = 0;
 		buf[1] = 0;
+
+		if (skb_is_gso(skb) && adapter->fw_large_send_support)
+			desc_flags |= IBMVETH_BUF_LRG_SND;
 	}
 
 retry_bounce:
@@ -1128,7 +1138,7 @@ retry_bounce:
 		descs[i+1].fields.address = dma_addr;
 	}
 
-	if (skb_is_gso(skb)) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 		if (adapter->fw_large_send_support) {
 			mss = (unsigned long)skb_shinfo(skb)->gso_size;
 			adapter->tx_large_packets++;
@@ -1232,6 +1242,66 @@ static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
 	}
 }
 
+static void ibmveth_rx_csum_helper(struct sk_buff *skb,
+				   struct ibmveth_adapter *adapter)
+{
+	struct iphdr *iph = NULL;
+	struct ipv6hdr *iph6 = NULL;
+	__be16 skb_proto = 0;
+	u16 iphlen = 0;
+	u16 iph_proto = 0;
+	u16 tcphdrlen = 0;
+
+	skb_proto = be16_to_cpu(skb->protocol);
+
+	if (skb_proto == ETH_P_IP) {
+		iph = (struct iphdr *)skb->data;
+
+		/* If the IP checksum is not offloaded and if the packet
+		 *  is large send, the checksum must be rebuilt.
+		 */
+		if (iph->check == 0xffff) {
+			iph->check = 0;
+			iph->check = ip_fast_csum((unsigned char *)iph,
+						  iph->ihl);
+		}
+
+		iphlen = iph->ihl * 4;
+		iph_proto = iph->protocol;
+	} else if (skb_proto == ETH_P_IPV6) {
+		iph6 = (struct ipv6hdr *)skb->data;
+		iphlen = sizeof(struct ipv6hdr);
+		iph_proto = iph6->nexthdr;
+	}
+
+	/* In OVS environment, when a flow is not cached, specifically for a
+	 * new TCP connection, the first (SYN) packet information is passed up
+	 * the user space for finding a flow. During this process, OVS computes
+	 * checksum on the packet when CHECKSUM_PARTIAL flag is set.
+	 * Given that we zeroed out TCP checksum field in transmit path as we
+	 * set "no checksum bit", OVS computed checksum will be incorrect w/o
+	 * TCP pseudo checksum in the packet.
+	 * So, re-compute TCP pseudo header checksum.
+	 */
+	if (iph_proto == IPPROTO_TCP && adapter->is_active_trunk) {
+		struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
+
+		tcphdrlen = skb->len - iphlen;
+
+		if (skb_proto == ETH_P_IP)
+			tcph->check = ~csum_tcpudp_magic(iph->saddr,
+					iph->daddr, tcphdrlen, iph_proto, 0);
+		else if (skb_proto == ETH_P_IPV6)
+			tcph->check = ~csum_ipv6_magic(&iph6->saddr,
+					&iph6->daddr, tcphdrlen, iph_proto, 0);
+
+		/* Setup SKB fields for checksum offload */
+		skb_partial_csum_set(skb, iphlen,
+				     offsetof(struct tcphdr, check));
+		skb_reset_network_header(skb);
+	}
+}
+
 static int ibmveth_poll(struct napi_struct *napi, int budget)
 {
 	struct ibmveth_adapter *adapter =
@@ -1239,7 +1309,6 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 	struct net_device *netdev = adapter->netdev;
 	int frames_processed = 0;
 	unsigned long lpar_rc;
-	struct iphdr *iph;
 	u16 mss = 0;
 
 restart_poll:
@@ -1297,17 +1366,7 @@ restart_poll:
 
 			if (csum_good) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				if (be16_to_cpu(skb->protocol) == ETH_P_IP) {
-					iph = (struct iphdr *)skb->data;
-
-					/* If the IP checksum is not offloaded and if the packet
-					 *  is large send, the checksum must be rebuilt.
-					 */
-					if (iph->check == 0xffff) {
-						iph->check = 0;
-						iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-					}
-				}
+				ibmveth_rx_csum_helper(skb, adapter);
 			}
 
 			if (length > netdev->mtu + ETH_HLEN) {
@@ -1626,6 +1685,13 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		netdev->hw_features |= NETIF_F_TSO;
 	}
 
+	adapter->is_active_trunk = false;
+	if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) {
+		adapter->is_active_trunk = true;
+		netdev->hw_features |= NETIF_F_FRAGLIST;
+		netdev->features |= NETIF_F_FRAGLIST;
+	}
+
 	netdev->min_mtu = IBMVETH_MIN_MTU;
 	netdev->max_mtu = ETH_MAX_MTU;
 
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 7acda04..de6e381 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -157,6 +157,7 @@ struct ibmveth_adapter {
     int pool_config;
     int rx_csum;
     int large_send;
+    bool is_active_trunk;
     void *bounce_buffer;
     dma_addr_t bounce_buffer_dma;
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ibmveth: Support to enable LSO/CSO for Trunk VEA.
  2017-04-07  9:57 [PATCH] ibmveth: Support to enable LSO/CSO for Trunk VEA Sivakumar Krishnasamy
@ 2017-04-08 18:45 ` David Miller
  2017-04-11  4:23   ` Sivakumar Krishnasamy
  2017-05-19  9:30 ` [PATCH net-next v2] " Sivakumar Krishnasamy
  1 sibling, 1 reply; 5+ messages in thread
From: David Miller @ 2017-04-08 18:45 UTC (permalink / raw)
  To: ksiva
  Cc: netdev, tlfalcon, benh, paulus, mpe, linuxppc-dev, linux-kernel,
	brking, seroyer

From: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>
Date: Fri,  7 Apr 2017 05:57:59 -0400

> Enable largesend and checksum offload for ibmveth configured in trunk mode.
> Added support to SKB frag_list in TX path by skb_linearize'ing such SKBs.
> 
> Signed-off-by: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>

Why is linearization necessary?

It would seem that the gains you get from GRO are nullified by
linearizing the SKB and thus copying all the data around and
allocating buffers.

Finally, all of that new checksumming stuff looks extremely
suspicious.  You have to explain why that is happening and why it
isn't because this driver is doing something incorrectly.

Thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ibmveth: Support to enable LSO/CSO for Trunk VEA.
  2017-04-08 18:45 ` David Miller
@ 2017-04-11  4:23   ` Sivakumar Krishnasamy
  0 siblings, 0 replies; 5+ messages in thread
From: Sivakumar Krishnasamy @ 2017-04-11  4:23 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, tlfalcon, benh, paulus, mpe, linuxppc-dev, linux-kernel,
	brking, seroyer

Re-sending as my earlier response had some HTML subparts.

Let me give some background before I answer your queries.

In IBM PowerVM environment, ibmveth driver supports largesend and 
checksum offload today, but only for virtual ethernet adapters (VEA) 
which are not configured in "Trunk mode".  In trunk mode, one cannot 
enable checksum and largesend offload capabilities. Without these 
offloads enabled, the performance numbers are not good. This patch is to 
enable these offloads for "Trunk" VEAs.

The following shows a typical configuration for network packet flow, 
when VMs in the PowerVM server have their network virtualized and 
communicate to external world.

         VM (ibmveth) <=> PowerVM Hypervisor <=>  PowerVM I/O Server VM 
( ibmveth in "Trunk mode" <=> OVS <=> Physical NIC ) <=>  External Network

As you can see the packets originating in VM will travel through local 
ibmveth driver and then to PowerVM Hypervisor, then it gets delivered to 
ibmveth driver configured in "Trunk" mode in I/O Server, which is then 
bridged by OVS to external network via Physical NIC.  To have largesend 
and checksum offload enabled end to end, from VM up to Physical NIC, 
ibmveth needs to support these offload capabilities when configured in 
"Trunk" mode too.

Before this patch, when a VM communicates with external network (in a 
configuration similar to above), throughput numbers were not so good 
(~1.5 Gbps) and with the patch, I see ~9.4 Gbps throughput for a 10G NIC 
(iperf used for measurements).

On 4/9/2017 12:15 AM, David Miller wrote:
> From: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>
> Date: Fri,  7 Apr 2017 05:57:59 -0400
>
>> Enable largesend and checksum offload for ibmveth configured in trunk mode.
>> Added support to SKB frag_list in TX path by skb_linearize'ing such SKBs.
>>
>> Signed-off-by: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>
>
> Why is linearization necessary?
>
> It would seem that the gains you get from GRO are nullified by
> linearizing the SKB and thus copying all the data around and
> allocating buffers.
>
When Physical NIC has GRO enabled and when OVS bridges these packets, 
OVS vport send code will end up calling dev_queue_xmit, which in turn 
calls validate_xmit_skb.

validate_xmit_skb has the below code snippet,

     if (netif_needs_gso(skb, features)) {
         struct sk_buff *segs;

         segs = skb_gso_segment(skb, features);     <=== Segments the 
GSO packet into MTU sized segments.

When the OVS outbound vport is ibmveth, netif_needs_gso returns 
positively if the SKB has a frag_list and if the driver doesn't support 
the same (NETIF_F_FRAGLIST feature).  So all the packets received by 
ibmveth are of MSS size (or lesser) due to the above code.

On a 10G physical NIC, the maximum throughput achieved was 2.2 Gbps due 
to the above segmentation in validate_xmit_skb. With the patch to 
linearize the SKB, the throughput increased to 9 Gbps (and ibmveth 
received packets without being segmented). This is ~4X improvement even 
though we end up allocating buffers and copying data.

> Finally, all of that new checksumming stuff looks extremely
> suspicious.  You have to explain why that is happening and why it
> isn't because this driver is doing something incorrectly.
>
> Thanks.
>
We are now enabling support for OVS and improving bridging performance 
in IBM's PowerVM environment, which brings in these new offload 
requirements for ibmveth driver configured in Trunk mode.

Please let me know if you need more details.

Regards,
Siva K

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net-next v2] ibmveth: Support to enable LSO/CSO for Trunk VEA.
  2017-04-07  9:57 [PATCH] ibmveth: Support to enable LSO/CSO for Trunk VEA Sivakumar Krishnasamy
  2017-04-08 18:45 ` David Miller
@ 2017-05-19  9:30 ` Sivakumar Krishnasamy
  2017-05-21 17:29   ` David Miller
  1 sibling, 1 reply; 5+ messages in thread
From: Sivakumar Krishnasamy @ 2017-05-19  9:30 UTC (permalink / raw)
  To: netdev
  Cc: tlfalcon, benh, paulus, mpe, linuxppc-dev, linux-kernel, brking,
	seroyer, bryantly, Sivakumar Krishnasamy

Current largesend and checksum offload feature in ibmveth driver,
 - Source VM sends the TCP packets with ip_summed field set as
   CHECKSUM_PARTIAL and TCP pseudo header checksum is placed in
   checksum field
 - CHECKSUM_PARTIAL flag in SKB will enable ibmveth driver to mark
   "no checksum" and "checksum good" bits in transmit buffer descriptor
   before the packet is delivered to pseries PowerVM Hypervisor
 - If ibmveth has largesend capability enabled, transmit buffer descriptors
   are market accordingly before packet is delivered to Hypervisor
   (along with mss value for packets with length > MSS)
 - Destination VM's ibmveth driver receives the packet with "checksum good"
   bit set and so, SKB's ip_summed field is set with CHECKSUM_UNNECESSARY
 - If "largesend" bit was on, mss value is copied from receive descriptor
   into SKB's gso_size and other flags are appropriately set for
   packets > MSS size
 - The packet is now successfully delivered up the stack in destination VM

The offloads described above works fine for TCP communication among VMs in
the same pseries server ( VM A <=> PowerVM Hypervisor <=> VM B )

We are now enabling support for OVS in pseries PowerVM environment. One of
our requirements is to have ibmveth driver configured in "Trunk" mode, when
they are used with OVS. This is because, PowerVM Hypervisor will no more
bridge the packets between VMs, instead the packets are delivered to
IO Server which hosts OVS to bridge them between VMs or to external
networks (flow shown below),
  VM A <=> PowerVM Hypervisor <=> IO Server(OVS) <=> PowerVM Hypervisor
                                                                   <=> VM B
In "IO server" the packet is received by inbound Trunk ibmveth and then
delivered to OVS, which is then bridged to outbound Trunk ibmveth (shown
below),
        Inbound Trunk ibmveth <=> OVS <=> Outbound Trunk ibmveth

In this model, we hit the following issues which impacted the VM
communication performance,

 - Issue 1: ibmveth doesn't support largesend and checksum offload features
   when configured as "Trunk". Driver has explicit checks to prevent
   enabling these offloads.

 - Issue 2: SYN packet drops seen at destination VM. When the packet
   originates, it has CHECKSUM_PARTIAL flag set and as it gets delivered to
   IO server's inbound Trunk ibmveth, on validating "checksum good" bits
   in ibmveth receive routine, SKB's ip_summed field is set with
   CHECKSUM_UNNECESSARY flag. This packet is then bridged by OVS (or Linux
   Bridge) and delivered to outbound Trunk ibmveth. At this point the
   outbound ibmveth transmit routine will not set "no checksum" and
   "checksum good" bits in transmit buffer descriptor, as it does so only
   when the ip_summed field is CHECKSUM_PARTIAL. When this packet gets
   delivered to destination VM, TCP layer receives the packet with checksum
   value of 0 and with no checksum related flags in ip_summed field. This
   leads to packet drops. So, TCP connections never goes through fine.

 - Issue 3: First packet of a TCP connection will be dropped, if there is
   no OVS flow cached in datapath. OVS while trying to identify the flow,
   computes the checksum. The computed checksum will be invalid at the
   receiving end, as ibmveth transmit routine zeroes out the pseudo
   checksum value in the packet. This leads to packet drop.

 - Issue 4: ibmveth driver doesn't have support for SKB's with frag_list.
   When Physical NIC has GRO enabled and when OVS bridges these packets,
   OVS vport send code will end up calling dev_queue_xmit, which in turn
   calls validate_xmit_skb.
   In validate_xmit_skb routine, the larger packets will get segmented into
   MSS sized segments, if SKB has a frag_list and if the driver to which
   they are delivered to doesn't support NETIF_F_FRAGLIST feature.

This patch addresses the above four issues, thereby enabling end to end
largesend and checksum offload support for better performance.

 - Fix for Issue 1 : Remove checks which prevent enabling TCP largesend and
   checksum offloads.
 - Fix for Issue 2 : When ibmveth receives a packet with "checksum good"
   bit set and if its configured in Trunk mode, set appropriate SKB fields
   using skb_partial_csum_set (ip_summed field is set with
   CHECKSUM_PARTIAL)
 - Fix for Issue 3: Recompute the pseudo header checksum before sending the
   SKB up the stack.
 - Fix for Issue 4: Linearize the SKBs with frag_list. Though we end up
   allocating buffers and copying data, this fix gives
   upto 4X throughput increase.

Note: All these fixes need to be dropped together as fixing just one of
them will lead to other issues immediately (especially for Issues 1,2 & 3).

Signed-off-by: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmveth.c | 107 ++++++++++++++++++++++++++++++-------
 drivers/net/ethernet/ibm/ibmveth.h |   1 +
 2 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 72ab7b6..9a74c4e 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -46,6 +46,8 @@
 #include <asm/vio.h>
 #include <asm/iommu.h>
 #include <asm/firmware.h>
+#include <net/tcp.h>
+#include <net/ip6_checksum.h>
 
 #include "ibmveth.h"
 
@@ -808,8 +810,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 
 	ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr);
 
-	if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) &&
-	    !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) &&
+	if (ret == H_SUCCESS &&
 	    (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
 		ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
 					 set_attr, &ret_attr);
@@ -1040,6 +1041,15 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 	dma_addr_t dma_addr;
 	unsigned long mss = 0;
 
+	/* veth doesn't handle frag_list, so linearize the skb.
+	 * When GRO is enabled SKB's can have frag_list.
+	 */
+	if (adapter->is_active_trunk &&
+	    skb_has_frag_list(skb) && __skb_linearize(skb)) {
+		netdev->stats.tx_dropped++;
+		goto out;
+	}
+
 	/*
 	 * veth handles a maximum of 6 segments including the header, so
 	 * we have to linearize the skb if there are more than this.
@@ -1064,9 +1074,6 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 
 	desc_flags = IBMVETH_BUF_VALID;
 
-	if (skb_is_gso(skb) && adapter->fw_large_send_support)
-		desc_flags |= IBMVETH_BUF_LRG_SND;
-
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		unsigned char *buf = skb_transport_header(skb) +
 						skb->csum_offset;
@@ -1076,6 +1083,9 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 		/* Need to zero out the checksum */
 		buf[0] = 0;
 		buf[1] = 0;
+
+		if (skb_is_gso(skb) && adapter->fw_large_send_support)
+			desc_flags |= IBMVETH_BUF_LRG_SND;
 	}
 
 retry_bounce:
@@ -1128,7 +1138,7 @@ retry_bounce:
 		descs[i+1].fields.address = dma_addr;
 	}
 
-	if (skb_is_gso(skb)) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 		if (adapter->fw_large_send_support) {
 			mss = (unsigned long)skb_shinfo(skb)->gso_size;
 			adapter->tx_large_packets++;
@@ -1232,6 +1242,71 @@ static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
 	}
 }
 
+static void ibmveth_rx_csum_helper(struct sk_buff *skb,
+				   struct ibmveth_adapter *adapter)
+{
+	struct iphdr *iph = NULL;
+	struct ipv6hdr *iph6 = NULL;
+	__be16 skb_proto = 0;
+	u16 iphlen = 0;
+	u16 iph_proto = 0;
+	u16 tcphdrlen = 0;
+
+	skb_proto = be16_to_cpu(skb->protocol);
+
+	if (skb_proto == ETH_P_IP) {
+		iph = (struct iphdr *)skb->data;
+
+		/* If the IP checksum is not offloaded and if the packet
+		 *  is large send, the checksum must be rebuilt.
+		 */
+		if (iph->check == 0xffff) {
+			iph->check = 0;
+			iph->check = ip_fast_csum((unsigned char *)iph,
+						  iph->ihl);
+		}
+
+		iphlen = iph->ihl * 4;
+		iph_proto = iph->protocol;
+	} else if (skb_proto == ETH_P_IPV6) {
+		iph6 = (struct ipv6hdr *)skb->data;
+		iphlen = sizeof(struct ipv6hdr);
+		iph_proto = iph6->nexthdr;
+	}
+
+	/* In OVS environment, when a flow is not cached, specifically for a
+	 * new TCP connection, the first packet information is passed up
+	 * the user space for finding a flow. During this process, OVS computes
+	 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
+	 *
+	 * Given that we zeroed out TCP checksum field in transmit path
+	 * (refer ibmveth_start_xmit routine) as we set "no checksum bit",
+	 * OVS computed checksum will be incorrect w/o TCP pseudo checksum
+	 * in the packet. This leads to OVS dropping the packet and hence
+	 * TCP retransmissions are seen.
+	 *
+	 * So, re-compute TCP pseudo header checksum.
+	 */
+	if (iph_proto == IPPROTO_TCP && adapter->is_active_trunk) {
+		struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
+
+		tcphdrlen = skb->len - iphlen;
+
+		/* Recompute TCP pseudo header checksum */
+		if (skb_proto == ETH_P_IP)
+			tcph->check = ~csum_tcpudp_magic(iph->saddr,
+					iph->daddr, tcphdrlen, iph_proto, 0);
+		else if (skb_proto == ETH_P_IPV6)
+			tcph->check = ~csum_ipv6_magic(&iph6->saddr,
+					&iph6->daddr, tcphdrlen, iph_proto, 0);
+
+		/* Setup SKB fields for checksum offload */
+		skb_partial_csum_set(skb, iphlen,
+				     offsetof(struct tcphdr, check));
+		skb_reset_network_header(skb);
+	}
+}
+
 static int ibmveth_poll(struct napi_struct *napi, int budget)
 {
 	struct ibmveth_adapter *adapter =
@@ -1239,7 +1314,6 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 	struct net_device *netdev = adapter->netdev;
 	int frames_processed = 0;
 	unsigned long lpar_rc;
-	struct iphdr *iph;
 	u16 mss = 0;
 
 restart_poll:
@@ -1297,17 +1371,7 @@ restart_poll:
 
 			if (csum_good) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				if (be16_to_cpu(skb->protocol) == ETH_P_IP) {
-					iph = (struct iphdr *)skb->data;
-
-					/* If the IP checksum is not offloaded and if the packet
-					 *  is large send, the checksum must be rebuilt.
-					 */
-					if (iph->check == 0xffff) {
-						iph->check = 0;
-						iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-					}
-				}
+				ibmveth_rx_csum_helper(skb, adapter);
 			}
 
 			if (length > netdev->mtu + ETH_HLEN) {
@@ -1626,6 +1690,13 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		netdev->hw_features |= NETIF_F_TSO;
 	}
 
+	adapter->is_active_trunk = false;
+	if (ret == H_SUCCESS && (ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK)) {
+		adapter->is_active_trunk = true;
+		netdev->hw_features |= NETIF_F_FRAGLIST;
+		netdev->features |= NETIF_F_FRAGLIST;
+	}
+
 	netdev->min_mtu = IBMVETH_MIN_MTU;
 	netdev->max_mtu = ETH_MAX_MTU;
 
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index ed8780c..01c587f 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -156,6 +156,7 @@ struct ibmveth_adapter {
     int pool_config;
     int rx_csum;
     int large_send;
+    bool is_active_trunk;
     void *bounce_buffer;
     dma_addr_t bounce_buffer_dma;
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v2] ibmveth: Support to enable LSO/CSO for Trunk VEA.
  2017-05-19  9:30 ` [PATCH net-next v2] " Sivakumar Krishnasamy
@ 2017-05-21 17:29   ` David Miller
  0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2017-05-21 17:29 UTC (permalink / raw)
  To: ksiva
  Cc: netdev, tlfalcon, benh, paulus, mpe, linuxppc-dev, linux-kernel,
	brking, seroyer, bryantly

From: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>
Date: Fri, 19 May 2017 05:30:38 -0400

 ...
> Signed-off-by: Sivakumar Krishnasamy <ksiva@linux.vnet.ibm.com>

Applied, thanks for the more detailed commit message.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-05-21 17:29 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-07  9:57 [PATCH] ibmveth: Support to enable LSO/CSO for Trunk VEA Sivakumar Krishnasamy
2017-04-08 18:45 ` David Miller
2017-04-11  4:23   ` Sivakumar Krishnasamy
2017-05-19  9:30 ` [PATCH net-next v2] " Sivakumar Krishnasamy
2017-05-21 17:29   ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).