All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] xen-netfront: Add support for IPv6 offloads
@ 2013-11-05 11:59 Paul Durrant
  2013-11-07 10:48 ` Ian Campbell
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Durrant @ 2013-11-05 11:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Boris Ostrovsky, Paul Durrant, David Vrabel

This patch adds support for IPv6 checksum offload and GSO when those
fetaures are available in the backend.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
---
 drivers/net/xen-netfront.c |  263 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 231 insertions(+), 32 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index dd1011e..27212d8 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx->flags |= XEN_NETTXF_extra_info;
 
 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
-		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
+			          XEN_NETIF_GSO_TYPE_TCPV6 :
+			          XEN_NETIF_GSO_TYPE_TCPV4;
 		gso->u.gso.pad = 0;
 		gso->u.gso.features = 0;
 
@@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
-	/* Currently only TCPv4 S.O. is supported. */
-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
+	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 		if (net_ratelimit())
 			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 		return -EINVAL;
 	}
 
 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type =
+		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
+		SKB_GSO_TCPV4 :
+		SKB_GSO_TCPV6;
 
 	/* Header must be checked, and gso_segs computed. */
 	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np,
 	return cons;
 }
 
-static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
 {
-	struct iphdr *iph;
-	int err = -EPROTO;
-	int recalculate_partial_csum = 0;
-
-	/*
-	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
-	 * peers can fail to set NETRXF_csum_blank when sending a GSO
-	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
-	 * recalculate the partial checksum.
-	 */
-	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-		struct netfront_info *np = netdev_priv(dev);
-		np->rx_gso_checksum_fixup++;
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		recalculate_partial_csum = 1;
+	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
+		/* If we need to pullup then pullup to the max, so we
+		 * won't need to do it again.
+		 */
+		int target = min_t(int, skb->len, MAX_TCP_HEADER);
+		__pskb_pull_tail(skb, target - skb_headlen(skb));
 	}
+}
 
-	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		return 0;
+static int checksum_setup_ip(struct sk_buff *skb, int recalculate_partial_csum)
+{
+	struct iphdr *iph = (void *)skb->data;
+	unsigned int header_size;
+	unsigned int off;
+	int err = -EPROTO;
 
-	if (skb->protocol != htons(ETH_P_IP))
-		goto out;
+	off = sizeof(struct iphdr);
 
-	iph = (void *)skb->data;
+	header_size = skb->network_header + off + MAX_IPOPTLEN;
+	maybe_pull_tail(skb, header_size);
+
+	off = iph->ihl * 4;
 
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
 			struct tcphdr *tcph = tcp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct tcphdr);
+			maybe_pull_tail(skb, header_size);
+
 			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
+							 skb->len - off,
 							 IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
-		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct udphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
 			struct udphdr *udph = udp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct udphdr);
+			maybe_pull_tail(skb, header_size);
+
 			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
+							 skb->len - off,
 							 IPPROTO_UDP, 0);
 		}
 		break;
 	default:
 		if (net_ratelimit())
-			pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
+			pr_err("Attempting to checksum a non-TCP/UDP packet, "
+			       "dropping a protocol %d packet\n",
 			       iph->protocol);
 		goto out;
 	}
@@ -922,6 +937,158 @@ out:
 	return err;
 }
 
+static int checksum_setup_ipv6(struct sk_buff *skb, int recalculate_partial_csum)
+{
+	int err = -EPROTO;
+	struct ipv6hdr *ipv6h = (void *)skb->data;
+	u8 nexthdr;
+	unsigned int header_size;
+	unsigned int off;
+	bool fragment;
+	bool done;
+
+	done = false;
+
+	off = sizeof(struct ipv6hdr);
+
+	header_size = skb->network_header + off;
+	maybe_pull_tail(skb, header_size);
+
+	nexthdr = ipv6h->nexthdr;
+
+	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
+	       !done) {
+		switch (nexthdr) {
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING: {
+			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct ipv6_opt_hdr);
+			maybe_pull_tail(skb, header_size);
+
+			nexthdr = hp->nexthdr;
+			off += ipv6_optlen(hp);
+			break;
+		}
+		case IPPROTO_AH: {
+			struct ip_auth_hdr *hp = (void *)(skb->data + off);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct ip_auth_hdr);
+			maybe_pull_tail(skb, header_size);
+
+			nexthdr = hp->nexthdr;
+			off += (hp->hdrlen+2)<<2;
+			break;
+		}
+		case IPPROTO_FRAGMENT:
+			fragment = true;
+			/* fall through */
+		default:
+			done = true;
+			break;
+		}
+	}
+
+	if (!done) {
+		if (net_ratelimit())
+			pr_err("Failed to parse packet header\n");
+		goto out;
+	}
+
+	if (fragment) {
+		if (net_ratelimit())
+			pr_err("Packet is a fragment!\n");
+		goto out;
+	}
+
+	switch (nexthdr) {
+	case IPPROTO_TCP:
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct tcphdr, check)))
+			goto out;
+
+		if (recalculate_partial_csum) {
+			struct tcphdr *tcph = tcp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct tcphdr);
+			maybe_pull_tail(skb, header_size);
+
+			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+						       &ipv6h->daddr,
+						       skb->len - off,
+						       IPPROTO_TCP, 0);
+		}
+		break;
+	case IPPROTO_UDP:
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct udphdr, check)))
+			goto out;
+
+		if (recalculate_partial_csum) {
+			struct udphdr *udph = udp_hdr(skb);
+
+			header_size = skb->network_header +
+				off +
+				sizeof(struct udphdr);
+			maybe_pull_tail(skb, header_size);
+
+			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+						       &ipv6h->daddr,
+						       skb->len - off,
+						       IPPROTO_UDP, 0);
+		}
+		break;
+	default:
+		if (net_ratelimit())
+			pr_err("Attempting to checksum a non-TCP/UDP packet, "
+			       "dropping a protocol %d packet\n",
+			       nexthdr);
+		goto out;
+	}
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+{
+	int err = -EPROTO;
+	int recalculate_partial_csum = 0;
+
+	/*
+	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
+	 * peers can fail to set NETRXF_csum_blank when sending a GSO
+	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
+	 * recalculate the partial checksum.
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
+		struct netfront_info *np = netdev_priv(dev);
+		np->rx_gso_checksum_fixup++;
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		recalculate_partial_csum = 1;
+	}
+
+	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		err = checksum_setup_ip(skb, recalculate_partial_csum);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		err = checksum_setup_ipv6(skb, recalculate_partial_csum);
+
+	return err;
+}
+
 static int handle_incoming_queue(struct net_device *dev,
 				 struct sk_buff_head *rxq)
 {
@@ -1232,6 +1399,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_SG;
 	}
 
+	if (features & NETIF_F_IPV6_CSUM) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-ipv6-csum-offload", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_IPV6_CSUM;
+	}
+
 	if (features & NETIF_F_TSO) {
 		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
 				 "feature-gso-tcpv4", "%d", &val) < 0)
@@ -1241,6 +1417,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_TSO;
 	}
 
+	if (features & NETIF_F_TSO6) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-gso-tcpv6", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_TSO6;
+	}
+
 	return features;
 }
 
@@ -1373,7 +1558,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
 	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				  NETIF_F_GSO_ROBUST;
-	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
+	netdev->hw_features	= NETIF_F_SG | 
+		                  NETIF_F_IPV6_CSUM |
+		                  NETIF_F_TSO | NETIF_F_TSO6;
 
 	/*
          * Assume that all hw features are available for now. This set
@@ -1751,6 +1938,18 @@ again:
 		goto abort_transaction;
 	}
 
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv6";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv6";
+		goto abort_transaction;
+	}
+
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
 		if (err == -EAGAIN)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
  2013-11-05 11:59 [PATCH net-next] xen-netfront: Add support for IPv6 offloads Paul Durrant
@ 2013-11-07 10:48 ` Ian Campbell
  2013-11-07 11:02   ` Paul Durrant
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-11-07 10:48 UTC (permalink / raw)
  To: Paul Durrant; +Cc: Boris Ostrovsky, David Vrabel, xen-devel

On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote:
> This patch adds support for IPv6 checksum offload and GSO when those
> fetaures are available in the backend.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> ---
>  drivers/net/xen-netfront.c |  263 ++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 231 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index dd1011e..27212d8 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		tx->flags |= XEN_NETTXF_extra_info;
>  
>  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> +		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
> +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> +			          XEN_NETIF_GSO_TYPE_TCPV4;
>  		gso->u.gso.pad = 0;
>  		gso->u.gso.features = 0;
>  
> @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
>  		return -EINVAL;
>  	}
>  
> -	/* Currently only TCPv4 S.O. is supported. */
> -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
>  		if (net_ratelimit())
>  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
>  		return -EINVAL;
>  	}
>  
>  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> +	skb_shinfo(skb)->gso_type =
> +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> +		SKB_GSO_TCPV4 :
> +		SKB_GSO_TCPV6;
>  
>  	/* Header must be checked, and gso_segs computed. */
>  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np,
>  	return cons;
>  }
>  
> -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
>  {
> -	struct iphdr *iph;
> -	int err = -EPROTO;
> -	int recalculate_partial_csum = 0;
> -
> -	/*
> -	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> -	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> -	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> -	 * recalculate the partial checksum.
> -	 */
> -	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> -		struct netfront_info *np = netdev_priv(dev);
> -		np->rx_gso_checksum_fixup++;
> -		skb->ip_summed = CHECKSUM_PARTIAL;
> -		recalculate_partial_csum = 1;
> +	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
> +		/* If we need to pullup then pullup to the max, so we
> +		 * won't need to do it again.
> +		 */
> +		int target = min_t(int, skb->len, MAX_TCP_HEADER);
> +		__pskb_pull_tail(skb, target - skb_headlen(skb));

Should the functions "len" argument not be involved somewhere in this?
What if it is bigger than MAX_TCP_HEADER for example?

>  	}
> +}
>  
> -	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> -	if (skb->ip_summed != CHECKSUM_PARTIAL)
> -		return 0;
> +static int checksum_setup_ip(struct sk_buff *skb, int recalculate_partial_csum)
> +{

> +	struct iphdr *iph = (void *)skb->data;
> +	unsigned int header_size;
> +	unsigned int off;
> +	int err = -EPROTO;
>  
> -	if (skb->protocol != htons(ETH_P_IP))
> -		goto out;
> +	off = sizeof(struct iphdr);
>  
> -	iph = (void *)skb->data;
> +	header_size = skb->network_header + off + MAX_IPOPTLEN;
> +	maybe_pull_tail(skb, header_size);

You never reuse header_size other than setting it and immediately
passing it to maybe_pull_tail, it could be inlined into the function
call?

(I found it confusing because you reset it in the recalculate_partial
rather than extending it which is what I expected)

maybe_pull_tail doesn't guarantee to pull up to the length of the given
parameter, e.g. if it is more then skb->len. Which I think means you
need some other explicit skb len checks around the place, don't you?
Otherwise you risk running past the end of the skb for a malformed
frame.

> +
> +	off = iph->ihl * 4;
>  
>  	switch (iph->protocol) {
>  	case IPPROTO_TCP:
> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> +		if (!skb_partial_csum_set(skb, off,
>  					  offsetof(struct tcphdr, check)))
>  			goto out;
>  
>  		if (recalculate_partial_csum) {
>  			struct tcphdr *tcph = tcp_hdr(skb);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct tcphdr);
> +			maybe_pull_tail(skb, header_size);
> +
>  			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
> -							 skb->len - iph->ihl*4,
> +							 skb->len - off,
>  							 IPPROTO_TCP, 0);
>  		}
>  		break;
>  	case IPPROTO_UDP:
> -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> +		if (!skb_partial_csum_set(skb, off,
>  					  offsetof(struct udphdr, check)))
>  			goto out;
>  
>  		if (recalculate_partial_csum) {
>  			struct udphdr *udph = udp_hdr(skb);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct udphdr);
> +			maybe_pull_tail(skb, header_size);
> +
>  			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
> -							 skb->len - iph->ihl*4,
> +							 skb->len - off,
>  							 IPPROTO_UDP, 0);
>  		}
>  		break;
>  	default:
>  		if (net_ratelimit())
> -			pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
> +			pr_err("Attempting to checksum a non-TCP/UDP packet, "
> +			       "dropping a protocol %d packet\n",

I think Linux's coding style explicitly relaxes the 80-column limit for
string literals.

>  			       iph->protocol);
>  		goto out;
>  	}
> @@ -922,6 +937,158 @@ out:
>  	return err;
>  }
>  
> +static int checksum_setup_ipv6(struct sk_buff *skb, int recalculate_partial_csum)
> +{
> +	int err = -EPROTO;
> +	struct ipv6hdr *ipv6h = (void *)skb->data;
> +	u8 nexthdr;
> +	unsigned int header_size;
> +	unsigned int off;
> +	bool fragment;
> +	bool done;
> +
> +	done = false;
> +
> +	off = sizeof(struct ipv6hdr);
> +
> +	header_size = skb->network_header + off;
> +	maybe_pull_tail(skb, header_size);

Same comment about skb lengths?

> +
> +	nexthdr = ipv6h->nexthdr;
> +
> +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
> +	       !done) {
> +		switch (nexthdr) {
> +		case IPPROTO_DSTOPTS:
> +		case IPPROTO_HOPOPTS:
> +		case IPPROTO_ROUTING: {
> +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct ipv6_opt_hdr);
> +			maybe_pull_tail(skb, header_size);
> +
> +			nexthdr = hp->nexthdr;
> +			off += ipv6_optlen(hp);
> +			break;
> +		}
> +		case IPPROTO_AH: {
> +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct ip_auth_hdr);
> +			maybe_pull_tail(skb, header_size);
> +
> +			nexthdr = hp->nexthdr;
> +			off += (hp->hdrlen+2)<<2;

I wonder if the core header would welcome a ipv6_ahlen cf optlen?

> +			break;
> +		}
> +		case IPPROTO_FRAGMENT:
> +			fragment = true;
> +			/* fall through */
> +		default:
> +			done = true;
> +			break;
> +		}
> +	}
> +
> +	if (!done) {
> +		if (net_ratelimit())
> +			pr_err("Failed to parse packet header\n");

Is it a requirement of the protocol that the list of IPPROTO_* we handle
must be followed by some other header? Do we really care or is it more
the upper stacks problem? Presumably it can get similarly malformed
packets off the wire?

Also you can use net_err_ratelimited.

> +		goto out;
> +	}
> +
> +	if (fragment) {
> +		if (net_ratelimit())
> +			pr_err("Packet is a fragment!\n");

Is that a bad thing?

> +		goto out;
> +	}
> +
> +	switch (nexthdr) {
> +	case IPPROTO_TCP:
> +		if (!skb_partial_csum_set(skb, off,
> +					  offsetof(struct tcphdr, check)))
> +			goto out;
> +
> +		if (recalculate_partial_csum) {
> +			struct tcphdr *tcph = tcp_hdr(skb);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct tcphdr);
> +			maybe_pull_tail(skb, header_size);
> +
> +			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> +						       &ipv6h->daddr,
> +						       skb->len - off,
> +						       IPPROTO_TCP, 0);
> +		}
> +		break;
> +	case IPPROTO_UDP:
> +		if (!skb_partial_csum_set(skb, off,
> +					  offsetof(struct udphdr, check)))
> +			goto out;
> +
> +		if (recalculate_partial_csum) {
> +			struct udphdr *udph = udp_hdr(skb);
> +
> +			header_size = skb->network_header +
> +				off +
> +				sizeof(struct udphdr);
> +			maybe_pull_tail(skb, header_size);
> +
> +			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> +						       &ipv6h->daddr,
> +						       skb->len - off,
> +						       IPPROTO_UDP, 0);
> +		}
> +		break;
> +	default:
> +		if (net_ratelimit())
> +			pr_err("Attempting to checksum a non-TCP/UDP packet, "
> +			       "dropping a protocol %d packet\n",
> +			       nexthdr);
> +		goto out;
> +	}
> +
> +	err = 0;
> +
> +out:
> +	return err;
> +}
> +
> +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> +{
> +	int err = -EPROTO;
> +	int recalculate_partial_csum = 0;
> +
> +	/*
> +	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> +	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> +	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> +	 * recalculate the partial checksum.
> +	 */
> +	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {

Is it worth making this ipv4 only. Assuming that anything which adds v6
support has already fixed this bug? That would save time in
checksum_setup_ipv6.

Speaking of which can checksum_setup_* be shared with netfront somehow?
Or maybe even put in common code? (Perhaps as a future cleanup)

> +		struct netfront_info *np = netdev_priv(dev);
> +		np->rx_gso_checksum_fixup++;
> +		skb->ip_summed = CHECKSUM_PARTIAL;
> +		recalculate_partial_csum = 1;
> +	}
> +
> +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> +		return 0;
> +
> +	if (skb->protocol == htons(ETH_P_IP))
> +		err = checksum_setup_ip(skb, recalculate_partial_csum);
> +	else if (skb->protocol == htons(ETH_P_IPV6))
> +		err = checksum_setup_ipv6(skb, recalculate_partial_csum);
> +
> +	return err;
> +}
> +
>  static int handle_incoming_queue(struct net_device *dev,
[...]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
  2013-11-07 10:48 ` Ian Campbell
@ 2013-11-07 11:02   ` Paul Durrant
  2013-11-07 11:49     ` Ian Campbell
  2013-11-15 11:19     ` Paul Durrant
  0 siblings, 2 replies; 13+ messages in thread
From: Paul Durrant @ 2013-11-07 11:02 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Boris Ostrovsky, David Vrabel, xen-devel

> -----Original Message-----
> From: Ian Campbell
> Sent: 07 November 2013 10:49
> To: Paul Durrant
> Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel
> Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> IPv6 offloads
> 
> On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote:
> > This patch adds support for IPv6 checksum offload and GSO when those
> > fetaures are available in the backend.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > Cc: David Vrabel <david.vrabel@citrix.com>
> > ---
> >  drivers/net/xen-netfront.c |  263
> ++++++++++++++++++++++++++++++++++++++------
> >  1 file changed, 231 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > index dd1011e..27212d8 100644
> > --- a/drivers/net/xen-netfront.c
> > +++ b/drivers/net/xen-netfront.c
> > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
> struct net_device *dev)
> >  		tx->flags |= XEN_NETTXF_extra_info;
> >
> >  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> > -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> > +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> SKB_GSO_TCPV6) ?
> > +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> > +			          XEN_NETIF_GSO_TYPE_TCPV4;
> >  		gso->u.gso.pad = 0;
> >  		gso->u.gso.features = 0;
> >
> > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff
> *skb,
> >  		return -EINVAL;
> >  	}
> >
> > -	/* Currently only TCPv4 S.O. is supported. */
> > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> > +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> >  		if (net_ratelimit())
> >  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
> >  		return -EINVAL;
> >  	}
> >
> >  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > +	skb_shinfo(skb)->gso_type =
> > +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> > +		SKB_GSO_TCPV4 :
> > +		SKB_GSO_TCPV6;
> >
> >  	/* Header must be checked, and gso_segs computed. */
> >  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct
> netfront_info *np,
> >  	return cons;
> >  }
> >
> > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
> >  {
> > -	struct iphdr *iph;
> > -	int err = -EPROTO;
> > -	int recalculate_partial_csum = 0;
> > -
> > -	/*
> > -	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > -	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> > -	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> > -	 * recalculate the partial checksum.
> > -	 */
> > -	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> > -		struct netfront_info *np = netdev_priv(dev);
> > -		np->rx_gso_checksum_fixup++;
> > -		skb->ip_summed = CHECKSUM_PARTIAL;
> > -		recalculate_partial_csum = 1;
> > +	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
> > +		/* If we need to pullup then pullup to the max, so we
> > +		 * won't need to do it again.
> > +		 */
> > +		int target = min_t(int, skb->len, MAX_TCP_HEADER);
> > +		__pskb_pull_tail(skb, target - skb_headlen(skb));
> 
> Should the functions "len" argument not be involved somewhere in this?
> What if it is bigger than MAX_TCP_HEADER for example?

Hmm, good point.

> 
> >  	}
> > +}
> >
> > -	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > -	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > -		return 0;
> > +static int checksum_setup_ip(struct sk_buff *skb, int
> recalculate_partial_csum)
> > +{
> 
> > +	struct iphdr *iph = (void *)skb->data;
> > +	unsigned int header_size;
> > +	unsigned int off;
> > +	int err = -EPROTO;
> >
> > -	if (skb->protocol != htons(ETH_P_IP))
> > -		goto out;
> > +	off = sizeof(struct iphdr);
> >
> > -	iph = (void *)skb->data;
> > +	header_size = skb->network_header + off + MAX_IPOPTLEN;
> > +	maybe_pull_tail(skb, header_size);
> 
> You never reuse header_size other than setting it and immediately
> passing it to maybe_pull_tail, it could be inlined into the function
> call?
> 

Yes, it could but I thought this was tidier. It's also the same as the code in netback.

> (I found it confusing because you reset it in the recalculate_partial
> rather than extending it which is what I expected)
> 
> maybe_pull_tail doesn't guarantee to pull up to the length of the given
> parameter, e.g. if it is more then skb->len. Which I think means you
> need some other explicit skb len checks around the place, don't you?
> Otherwise you risk running past the end of the skb for a malformed
> frame.
> 

Yes, that's true. Probably best to have maybe_pull_tail() return a failure if it doesn't managed to pullup the requested amount then.

> > +
> > +	off = iph->ihl * 4;
> >
> >  	switch (iph->protocol) {
> >  	case IPPROTO_TCP:
> > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > +		if (!skb_partial_csum_set(skb, off,
> >  					  offsetof(struct tcphdr, check)))
> >  			goto out;
> >
> >  		if (recalculate_partial_csum) {
> >  			struct tcphdr *tcph = tcp_hdr(skb);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct tcphdr);
> > +			maybe_pull_tail(skb, header_size);
> > +
> >  			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph-
> >daddr,
> > -							 skb->len - iph->ihl*4,
> > +							 skb->len - off,
> >  							 IPPROTO_TCP, 0);
> >  		}
> >  		break;
> >  	case IPPROTO_UDP:
> > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > +		if (!skb_partial_csum_set(skb, off,
> >  					  offsetof(struct udphdr, check)))
> >  			goto out;
> >
> >  		if (recalculate_partial_csum) {
> >  			struct udphdr *udph = udp_hdr(skb);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct udphdr);
> > +			maybe_pull_tail(skb, header_size);
> > +
> >  			udph->check = ~csum_tcpudp_magic(iph->saddr,
> iph->daddr,
> > -							 skb->len - iph->ihl*4,
> > +							 skb->len - off,
> >  							 IPPROTO_UDP, 0);
> >  		}
> >  		break;
> >  	default:
> >  		if (net_ratelimit())
> > -			pr_err("Attempting to checksum a non-TCP/UDP
> packet, dropping a protocol %d packet\n",
> > +			pr_err("Attempting to checksum a non-TCP/UDP
> packet, "
> > +			       "dropping a protocol %d packet\n",
> 
> I think Linux's coding style explicitly relaxes the 80-column limit for
> string literals.
> 

Ok. Good to know.

> >  			       iph->protocol);
> >  		goto out;
> >  	}
> > @@ -922,6 +937,158 @@ out:
> >  	return err;
> >  }
> >
> > +static int checksum_setup_ipv6(struct sk_buff *skb, int
> recalculate_partial_csum)
> > +{
> > +	int err = -EPROTO;
> > +	struct ipv6hdr *ipv6h = (void *)skb->data;
> > +	u8 nexthdr;
> > +	unsigned int header_size;
> > +	unsigned int off;
> > +	bool fragment;
> > +	bool done;
> > +
> > +	done = false;
> > +
> > +	off = sizeof(struct ipv6hdr);
> > +
> > +	header_size = skb->network_header + off;
> > +	maybe_pull_tail(skb, header_size);
> 
> Same comment about skb lengths?
> 
> > +
> > +	nexthdr = ipv6h->nexthdr;
> > +
> > +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
> &&
> > +	       !done) {
> > +		switch (nexthdr) {
> > +		case IPPROTO_DSTOPTS:
> > +		case IPPROTO_HOPOPTS:
> > +		case IPPROTO_ROUTING: {
> > +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct ipv6_opt_hdr);
> > +			maybe_pull_tail(skb, header_size);
> > +
> > +			nexthdr = hp->nexthdr;
> > +			off += ipv6_optlen(hp);
> > +			break;
> > +		}
> > +		case IPPROTO_AH: {
> > +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct ip_auth_hdr);
> > +			maybe_pull_tail(skb, header_size);
> > +
> > +			nexthdr = hp->nexthdr;
> > +			off += (hp->hdrlen+2)<<2;
> 
> I wonder if the core header would welcome a ipv6_ahlen cf optlen?
> 

Yes, it would be nicer wouldn't it.

> > +			break;
> > +		}
> > +		case IPPROTO_FRAGMENT:
> > +			fragment = true;
> > +			/* fall through */
> > +		default:
> > +			done = true;
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (!done) {
> > +		if (net_ratelimit())
> > +			pr_err("Failed to parse packet header\n");
> 
> Is it a requirement of the protocol that the list of IPPROTO_* we handle
> must be followed by some other header? Do we really care or is it more
> the upper stacks problem? Presumably it can get similarly malformed
> packets off the wire?

I'd have to check but I'm pretty sure none of the protos we handle can be terminating so if done is not set then the packet is malformed.

> 
> Also you can use net_err_ratelimited.

Ok.

> 
> > +		goto out;
> > +	}
> > +
> > +	if (fragment) {
> > +		if (net_ratelimit())
> > +			pr_err("Packet is a fragment!\n");
> 
> Is that a bad thing?
> 

Well, you can't do TCP or UDP checksum offload on a fragment, can you?

> > +		goto out;
> > +	}
> > +
> > +	switch (nexthdr) {
> > +	case IPPROTO_TCP:
> > +		if (!skb_partial_csum_set(skb, off,
> > +					  offsetof(struct tcphdr, check)))
> > +			goto out;
> > +
> > +		if (recalculate_partial_csum) {
> > +			struct tcphdr *tcph = tcp_hdr(skb);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct tcphdr);
> > +			maybe_pull_tail(skb, header_size);
> > +
> > +			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> > +						       &ipv6h->daddr,
> > +						       skb->len - off,
> > +						       IPPROTO_TCP, 0);
> > +		}
> > +		break;
> > +	case IPPROTO_UDP:
> > +		if (!skb_partial_csum_set(skb, off,
> > +					  offsetof(struct udphdr, check)))
> > +			goto out;
> > +
> > +		if (recalculate_partial_csum) {
> > +			struct udphdr *udph = udp_hdr(skb);
> > +
> > +			header_size = skb->network_header +
> > +				off +
> > +				sizeof(struct udphdr);
> > +			maybe_pull_tail(skb, header_size);
> > +
> > +			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> > +						       &ipv6h->daddr,
> > +						       skb->len - off,
> > +						       IPPROTO_UDP, 0);
> > +		}
> > +		break;
> > +	default:
> > +		if (net_ratelimit())
> > +			pr_err("Attempting to checksum a non-TCP/UDP
> packet, "
> > +			       "dropping a protocol %d packet\n",
> > +			       nexthdr);
> > +		goto out;
> > +	}
> > +
> > +	err = 0;
> > +
> > +out:
> > +	return err;
> > +}
> > +
> > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > +{
> > +	int err = -EPROTO;
> > +	int recalculate_partial_csum = 0;
> > +
> > +	/*
> > +	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > +	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> > +	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> > +	 * recalculate the partial checksum.
> > +	 */
> > +	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> 
> Is it worth making this ipv4 only. Assuming that anything which adds v6
> support has already fixed this bug? That would save time in
> checksum_setup_ipv6.
> 

That's true.

> Speaking of which can checksum_setup_* be shared with netfront
> somehow?
> Or maybe even put in common code? (Perhaps as a future cleanup)
> 

I think it really should be in common code, but I think that would be better handled as a separate patch to remove the duplication.

Thanks for the comprehensive review!

  Paul

> > +		struct netfront_info *np = netdev_priv(dev);
> > +		np->rx_gso_checksum_fixup++;
> > +		skb->ip_summed = CHECKSUM_PARTIAL;
> > +		recalculate_partial_csum = 1;
> > +	}
> > +
> > +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > +		return 0;
> > +
> > +	if (skb->protocol == htons(ETH_P_IP))
> > +		err = checksum_setup_ip(skb, recalculate_partial_csum);
> > +	else if (skb->protocol == htons(ETH_P_IPV6))
> > +		err = checksum_setup_ipv6(skb, recalculate_partial_csum);
> > +
> > +	return err;
> > +}
> > +
> >  static int handle_incoming_queue(struct net_device *dev,
> [...]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
  2013-11-07 11:02   ` Paul Durrant
@ 2013-11-07 11:49     ` Ian Campbell
  2013-11-15 11:19     ` Paul Durrant
  1 sibling, 0 replies; 13+ messages in thread
From: Ian Campbell @ 2013-11-07 11:49 UTC (permalink / raw)
  To: Paul Durrant; +Cc: Boris Ostrovsky, David Vrabel, xen-devel

On Thu, 2013-11-07 at 11:02 +0000, Paul Durrant wrote:

> > (I found it confusing because you reset it in the recalculate_partial
> > rather than extending it which is what I expected)
> > 
> > maybe_pull_tail doesn't guarantee to pull up to the length of the given
> > parameter, e.g. if it is more then skb->len. Which I think means you
> > need some other explicit skb len checks around the place, don't you?
> > Otherwise you risk running past the end of the skb for a malformed
> > frame.
> > 
> 
> Yes, that's true. Probably best to have maybe_pull_tail() return a
> failure if it doesn't managed to pullup the requested amount then.

That would work.

> > Is it a requirement of the protocol that the list of IPPROTO_* we handle
> > must be followed by some other header? Do we really care or is it more
> > the upper stacks problem? Presumably it can get similarly malformed
> > packets off the wire?
> 
> I'd have to check but I'm pretty sure none of the protos we handle can
> be terminating so if done is not set then the packet is malformed.

OK. I'm not sure this is "our" problem though. we could just silently
throw this corrupt frame up at the stack without trying to redo the
checksum. Isn't that what would happen with real hardware?

> > > +		goto out;
> > > +	}
> > > +
> > > +	if (fragment) {
> > > +		if (net_ratelimit())
> > > +			pr_err("Packet is a fragment!\n");
> > 
> > Is that a bad thing?
> > 
> 
> Well, you can't do TCP or UDP checksum offload on a fragment, can you?

I guess I meant (as above), why do we particularly care? We could just
throw it at the stack, who will account it as a RX error and carry on

> > Speaking of which can checksum_setup_* be shared with netfront
> > somehow?
> > Or maybe even put in common code? (Perhaps as a future cleanup)
> > 
> 
> I think it really should be in common code, but I think that would be
> better handled as a separate patch to remove the duplication.

I'm ok with that. (I'm not netfront maintainer though...)

> Thanks for the comprehensive review!

No problem!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
  2013-11-07 11:02   ` Paul Durrant
  2013-11-07 11:49     ` Ian Campbell
@ 2013-11-15 11:19     ` Paul Durrant
  2013-11-19 10:28       ` Ian Campbell
  1 sibling, 1 reply; 13+ messages in thread
From: Paul Durrant @ 2013-11-15 11:19 UTC (permalink / raw)
  To: Paul Durrant, Ian Campbell; +Cc: Boris Ostrovsky, David Vrabel, xen-devel

> -----Original Message-----
> From: xen-devel-bounces@lists.xen.org [mailto:xen-devel-
> bounces@lists.xen.org] On Behalf Of Paul Durrant
> Sent: 07 November 2013 11:02
> To: Ian Campbell
> Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> IPv6 offloads
> 
> > -----Original Message-----
> > From: Ian Campbell
> > Sent: 07 November 2013 10:49
> > To: Paul Durrant
> > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel
> > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> > IPv6 offloads
> >
> > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote:
> > > This patch adds support for IPv6 checksum offload and GSO when those
> > > fetaures are available in the backend.
> > >
> > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > > Cc: David Vrabel <david.vrabel@citrix.com>
> > > ---
> > >  drivers/net/xen-netfront.c |  263
> > ++++++++++++++++++++++++++++++++++++++------
> > >  1 file changed, 231 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > > index dd1011e..27212d8 100644
> > > --- a/drivers/net/xen-netfront.c
> > > +++ b/drivers/net/xen-netfront.c
> > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
> > struct net_device *dev)
> > >  		tx->flags |= XEN_NETTXF_extra_info;
> > >
> > >  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> > > -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> > > +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> > SKB_GSO_TCPV6) ?
> > > +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> > > +			          XEN_NETIF_GSO_TYPE_TCPV4;
> > >  		gso->u.gso.pad = 0;
> > >  		gso->u.gso.features = 0;
> > >
> > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff
> > *skb,
> > >  		return -EINVAL;
> > >  	}
> > >
> > > -	/* Currently only TCPv4 S.O. is supported. */
> > > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > > +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> > > +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> > >  		if (net_ratelimit())
> > >  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
> > >  		return -EINVAL;
> > >  	}
> > >
> > >  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > > +	skb_shinfo(skb)->gso_type =
> > > +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> > > +		SKB_GSO_TCPV4 :
> > > +		SKB_GSO_TCPV6;
> > >
> > >  	/* Header must be checked, and gso_segs computed. */
> > >  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct
> > netfront_info *np,
> > >  	return cons;
> > >  }
> > >
> > > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
> > >  {
> > > -	struct iphdr *iph;
> > > -	int err = -EPROTO;
> > > -	int recalculate_partial_csum = 0;
> > > -
> > > -	/*
> > > -	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > > -	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> > > -	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> > > -	 * recalculate the partial checksum.
> > > -	 */
> > > -	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> > > -		struct netfront_info *np = netdev_priv(dev);
> > > -		np->rx_gso_checksum_fixup++;
> > > -		skb->ip_summed = CHECKSUM_PARTIAL;
> > > -		recalculate_partial_csum = 1;
> > > +	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
> > > +		/* If we need to pullup then pullup to the max, so we
> > > +		 * won't need to do it again.
> > > +		 */
> > > +		int target = min_t(int, skb->len, MAX_TCP_HEADER);
> > > +		__pskb_pull_tail(skb, target - skb_headlen(skb));
> >
> > Should the functions "len" argument not be involved somewhere in this?
> > What if it is bigger than MAX_TCP_HEADER for example?
> 
> Hmm, good point.
> 

Actually, now I look again, this is correct. The if statement checks len to see if a pullup is necessary, but if one is necessary then it always pulls up to the max. I still need to add a failure return to indicate the pullup did not achieve the desired length though.

  Paul

> >
> > >  	}
> > > +}
> > >
> > > -	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > > -	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > > -		return 0;
> > > +static int checksum_setup_ip(struct sk_buff *skb, int
> > recalculate_partial_csum)
> > > +{
> >
> > > +	struct iphdr *iph = (void *)skb->data;
> > > +	unsigned int header_size;
> > > +	unsigned int off;
> > > +	int err = -EPROTO;
> > >
> > > -	if (skb->protocol != htons(ETH_P_IP))
> > > -		goto out;
> > > +	off = sizeof(struct iphdr);
> > >
> > > -	iph = (void *)skb->data;
> > > +	header_size = skb->network_header + off + MAX_IPOPTLEN;
> > > +	maybe_pull_tail(skb, header_size);
> >
> > You never reuse header_size other than setting it and immediately
> > passing it to maybe_pull_tail, it could be inlined into the function
> > call?
> >
> 
> Yes, it could but I thought this was tidier. It's also the same as the code in
> netback.
> 
> > (I found it confusing because you reset it in the recalculate_partial
> > rather than extending it which is what I expected)
> >
> > maybe_pull_tail doesn't guarantee to pull up to the length of the given
> > parameter, e.g. if it is more then skb->len. Which I think means you
> > need some other explicit skb len checks around the place, don't you?
> > Otherwise you risk running past the end of the skb for a malformed
> > frame.
> >
> 
> Yes, that's true. Probably best to have maybe_pull_tail() return a failure if it
> doesn't managed to pullup the requested amount then.
> 
> > > +
> > > +	off = iph->ihl * 4;
> > >
> > >  	switch (iph->protocol) {
> > >  	case IPPROTO_TCP:
> > > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > > +		if (!skb_partial_csum_set(skb, off,
> > >  					  offsetof(struct tcphdr, check)))
> > >  			goto out;
> > >
> > >  		if (recalculate_partial_csum) {
> > >  			struct tcphdr *tcph = tcp_hdr(skb);
> > > +
> > > +			header_size = skb->network_header +
> > > +				off +
> > > +				sizeof(struct tcphdr);
> > > +			maybe_pull_tail(skb, header_size);
> > > +
> > >  			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph-
> > >daddr,
> > > -							 skb->len - iph->ihl*4,
> > > +							 skb->len - off,
> > >  							 IPPROTO_TCP, 0);
> > >  		}
> > >  		break;
> > >  	case IPPROTO_UDP:
> > > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > > +		if (!skb_partial_csum_set(skb, off,
> > >  					  offsetof(struct udphdr, check)))
> > >  			goto out;
> > >
> > >  		if (recalculate_partial_csum) {
> > >  			struct udphdr *udph = udp_hdr(skb);
> > > +
> > > +			header_size = skb->network_header +
> > > +				off +
> > > +				sizeof(struct udphdr);
> > > +			maybe_pull_tail(skb, header_size);
> > > +
> > >  			udph->check = ~csum_tcpudp_magic(iph->saddr,
> > iph->daddr,
> > > -							 skb->len - iph->ihl*4,
> > > +							 skb->len - off,
> > >  							 IPPROTO_UDP, 0);
> > >  		}
> > >  		break;
> > >  	default:
> > >  		if (net_ratelimit())
> > > -			pr_err("Attempting to checksum a non-TCP/UDP
> > packet, dropping a protocol %d packet\n",
> > > +			pr_err("Attempting to checksum a non-TCP/UDP
> > packet, "
> > > +			       "dropping a protocol %d packet\n",
> >
> > I think Linux's coding style explicitly relaxes the 80-column limit for
> > string literals.
> >
> 
> Ok. Good to know.
> 
> > >  			       iph->protocol);
> > >  		goto out;
> > >  	}
> > > @@ -922,6 +937,158 @@ out:
> > >  	return err;
> > >  }
> > >
> > > +static int checksum_setup_ipv6(struct sk_buff *skb, int
> > recalculate_partial_csum)
> > > +{
> > > +	int err = -EPROTO;
> > > +	struct ipv6hdr *ipv6h = (void *)skb->data;
> > > +	u8 nexthdr;
> > > +	unsigned int header_size;
> > > +	unsigned int off;
> > > +	bool fragment;
> > > +	bool done;
> > > +
> > > +	done = false;
> > > +
> > > +	off = sizeof(struct ipv6hdr);
> > > +
> > > +	header_size = skb->network_header + off;
> > > +	maybe_pull_tail(skb, header_size);
> >
> > Same comment about skb lengths?
> >
> > > +
> > > +	nexthdr = ipv6h->nexthdr;
> > > +
> > > +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
> > &&
> > > +	       !done) {
> > > +		switch (nexthdr) {
> > > +		case IPPROTO_DSTOPTS:
> > > +		case IPPROTO_HOPOPTS:
> > > +		case IPPROTO_ROUTING: {
> > > +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> > > +
> > > +			header_size = skb->network_header +
> > > +				off +
> > > +				sizeof(struct ipv6_opt_hdr);
> > > +			maybe_pull_tail(skb, header_size);
> > > +
> > > +			nexthdr = hp->nexthdr;
> > > +			off += ipv6_optlen(hp);
> > > +			break;
> > > +		}
> > > +		case IPPROTO_AH: {
> > > +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> > > +
> > > +			header_size = skb->network_header +
> > > +				off +
> > > +				sizeof(struct ip_auth_hdr);
> > > +			maybe_pull_tail(skb, header_size);
> > > +
> > > +			nexthdr = hp->nexthdr;
> > > +			off += (hp->hdrlen+2)<<2;
> >
> > I wonder if the core header would welcome a ipv6_ahlen cf optlen?
> >
> 
> Yes, it would be nicer wouldn't it.
> 
> > > +			break;
> > > +		}
> > > +		case IPPROTO_FRAGMENT:
> > > +			fragment = true;
> > > +			/* fall through */
> > > +		default:
> > > +			done = true;
> > > +			break;
> > > +		}
> > > +	}
> > > +
> > > +	if (!done) {
> > > +		if (net_ratelimit())
> > > +			pr_err("Failed to parse packet header\n");
> >
> > Is it a requirement of the protocol that the list of IPPROTO_* we handle
> > must be followed by some other header? Do we really care or is it more
> > the upper stacks problem? Presumably it can get similarly malformed
> > packets off the wire?
> 
> I'd have to check but I'm pretty sure none of the protos we handle can be
> terminating so if done is not set then the packet is malformed.
> 
> >
> > Also you can use net_err_ratelimited.
> 
> Ok.
> 
> >
> > > +		goto out;
> > > +	}
> > > +
> > > +	if (fragment) {
> > > +		if (net_ratelimit())
> > > +			pr_err("Packet is a fragment!\n");
> >
> > Is that a bad thing?
> >
> 
> Well, you can't do TCP or UDP checksum offload on a fragment, can you?
> 
> > > +		goto out;
> > > +	}
> > > +
> > > +	switch (nexthdr) {
> > > +	case IPPROTO_TCP:
> > > +		if (!skb_partial_csum_set(skb, off,
> > > +					  offsetof(struct tcphdr, check)))
> > > +			goto out;
> > > +
> > > +		if (recalculate_partial_csum) {
> > > +			struct tcphdr *tcph = tcp_hdr(skb);
> > > +
> > > +			header_size = skb->network_header +
> > > +				off +
> > > +				sizeof(struct tcphdr);
> > > +			maybe_pull_tail(skb, header_size);
> > > +
> > > +			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> > > +						       &ipv6h->daddr,
> > > +						       skb->len - off,
> > > +						       IPPROTO_TCP, 0);
> > > +		}
> > > +		break;
> > > +	case IPPROTO_UDP:
> > > +		if (!skb_partial_csum_set(skb, off,
> > > +					  offsetof(struct udphdr, check)))
> > > +			goto out;
> > > +
> > > +		if (recalculate_partial_csum) {
> > > +			struct udphdr *udph = udp_hdr(skb);
> > > +
> > > +			header_size = skb->network_header +
> > > +				off +
> > > +				sizeof(struct udphdr);
> > > +			maybe_pull_tail(skb, header_size);
> > > +
> > > +			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> > > +						       &ipv6h->daddr,
> > > +						       skb->len - off,
> > > +						       IPPROTO_UDP, 0);
> > > +		}
> > > +		break;
> > > +	default:
> > > +		if (net_ratelimit())
> > > +			pr_err("Attempting to checksum a non-TCP/UDP
> > packet, "
> > > +			       "dropping a protocol %d packet\n",
> > > +			       nexthdr);
> > > +		goto out;
> > > +	}
> > > +
> > > +	err = 0;
> > > +
> > > +out:
> > > +	return err;
> > > +}
> > > +
> > > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > > +{
> > > +	int err = -EPROTO;
> > > +	int recalculate_partial_csum = 0;
> > > +
> > > +	/*
> > > +	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > > +	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> > > +	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> > > +	 * recalculate the partial checksum.
> > > +	 */
> > > +	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> >
> > Is it worth making this ipv4 only. Assuming that anything which adds v6
> > support has already fixed this bug? That would save time in
> > checksum_setup_ipv6.
> >
> 
> That's true.
> 
> > Speaking of which can checksum_setup_* be shared with netfront
> > somehow?
> > Or maybe even put in common code? (Perhaps as a future cleanup)
> >
> 
> I think it really should be in common code, but I think that would be better
> handled as a separate patch to remove the duplication.
> 
> Thanks for the comprehensive review!
> 
>   Paul
> 
> > > +		struct netfront_info *np = netdev_priv(dev);
> > > +		np->rx_gso_checksum_fixup++;
> > > +		skb->ip_summed = CHECKSUM_PARTIAL;
> > > +		recalculate_partial_csum = 1;
> > > +	}
> > > +
> > > +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > > +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > > +		return 0;
> > > +
> > > +	if (skb->protocol == htons(ETH_P_IP))
> > > +		err = checksum_setup_ip(skb, recalculate_partial_csum);
> > > +	else if (skb->protocol == htons(ETH_P_IPV6))
> > > +		err = checksum_setup_ipv6(skb, recalculate_partial_csum);
> > > +
> > > +	return err;
> > > +}
> > > +
> > >  static int handle_incoming_queue(struct net_device *dev,
> > [...]
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
  2013-11-15 11:19     ` Paul Durrant
@ 2013-11-19 10:28       ` Ian Campbell
  2013-11-19 11:24         ` Paul Durrant
  0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-11-19 10:28 UTC (permalink / raw)
  To: Paul Durrant; +Cc: Boris Ostrovsky, David Vrabel, xen-devel

On Fri, 2013-11-15 at 11:19 +0000, Paul Durrant wrote:
> > -----Original Message-----
> > From: xen-devel-bounces@lists.xen.org [mailto:xen-devel-
> > bounces@lists.xen.org] On Behalf Of Paul Durrant
> > Sent: 07 November 2013 11:02
> > To: Ian Campbell
> > Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org
> > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> > IPv6 offloads
> > 
> > > -----Original Message-----
> > > From: Ian Campbell
> > > Sent: 07 November 2013 10:49
> > > To: Paul Durrant
> > > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel
> > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> > > IPv6 offloads
> > >
> > > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote:
> > > > This patch adds support for IPv6 checksum offload and GSO when those
> > > > fetaures are available in the backend.
> > > >
> > > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > > > Cc: David Vrabel <david.vrabel@citrix.com>
> > > > ---
> > > >  drivers/net/xen-netfront.c |  263
> > > ++++++++++++++++++++++++++++++++++++++------
> > > >  1 file changed, 231 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > > > index dd1011e..27212d8 100644
> > > > --- a/drivers/net/xen-netfront.c
> > > > +++ b/drivers/net/xen-netfront.c
> > > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
> > > struct net_device *dev)
> > > >  		tx->flags |= XEN_NETTXF_extra_info;
> > > >
> > > >  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> > > > -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> > > > +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> > > SKB_GSO_TCPV6) ?
> > > > +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> > > > +			          XEN_NETIF_GSO_TYPE_TCPV4;
> > > >  		gso->u.gso.pad = 0;
> > > >  		gso->u.gso.features = 0;
> > > >
> > > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff
> > > *skb,
> > > >  		return -EINVAL;
> > > >  	}
> > > >
> > > > -	/* Currently only TCPv4 S.O. is supported. */
> > > > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > > > +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> > > > +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> > > >  		if (net_ratelimit())
> > > >  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
> > > >  		return -EINVAL;
> > > >  	}
> > > >
> > > >  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > > > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > > > +	skb_shinfo(skb)->gso_type =
> > > > +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> > > > +		SKB_GSO_TCPV4 :
> > > > +		SKB_GSO_TCPV6;
> > > >
> > > >  	/* Header must be checked, and gso_segs computed. */
> > > >  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct
> > > netfront_info *np,
> > > >  	return cons;
> > > >  }
> > > >
> > > > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
> > > >  {
> > > > -	struct iphdr *iph;
> > > > -	int err = -EPROTO;
> > > > -	int recalculate_partial_csum = 0;
> > > > -
> > > > -	/*
> > > > -	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > > > -	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> > > > -	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> > > > -	 * recalculate the partial checksum.
> > > > -	 */
> > > > -	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> > > > -		struct netfront_info *np = netdev_priv(dev);
> > > > -		np->rx_gso_checksum_fixup++;
> > > > -		skb->ip_summed = CHECKSUM_PARTIAL;
> > > > -		recalculate_partial_csum = 1;
> > > > +	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
> > > > +		/* If we need to pullup then pullup to the max, so we
> > > > +		 * won't need to do it again.
> > > > +		 */
> > > > +		int target = min_t(int, skb->len, MAX_TCP_HEADER);
> > > > +		__pskb_pull_tail(skb, target - skb_headlen(skb));
> > >
> > > Should the functions "len" argument not be involved somewhere in this?
> > > What if it is bigger than MAX_TCP_HEADER for example?
> > 
> > Hmm, good point.
> > 
> 
> Actually, now I look again, this is correct. The if statement checks
> len to see if a pullup is necessary, but if one is necessary then it
> always pulls up to the max.

So it is guaranteed that len <= MAX_TCP_HEADER in all circumstances?
That might surprise some callers, since the name doesn't really indicate
this has anything to do with TCP. How about passing max in as an
argument, where all callers would currently pass MAX_TCP_HEADER?

>  I still need to add a failure return to indicate the pullup did not
> achieve the desired length though.
> 
>   Paul
> 
> > >
> > > >  	}
> > > > +}
> > > >
> > > > -	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > > > -	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > > > -		return 0;
> > > > +static int checksum_setup_ip(struct sk_buff *skb, int
> > > recalculate_partial_csum)
> > > > +{
> > >
> > > > +	struct iphdr *iph = (void *)skb->data;
> > > > +	unsigned int header_size;
> > > > +	unsigned int off;
> > > > +	int err = -EPROTO;
> > > >
> > > > -	if (skb->protocol != htons(ETH_P_IP))
> > > > -		goto out;
> > > > +	off = sizeof(struct iphdr);
> > > >
> > > > -	iph = (void *)skb->data;
> > > > +	header_size = skb->network_header + off + MAX_IPOPTLEN;
> > > > +	maybe_pull_tail(skb, header_size);
> > >
> > > You never reuse header_size other than setting it and immediately
> > > passing it to maybe_pull_tail, it could be inlined into the function
> > > call?
> > >
> > 
> > Yes, it could but I thought this was tidier. It's also the same as the code in
> > netback.
> > 
> > > (I found it confusing because you reset it in the recalculate_partial
> > > rather than extending it which is what I expected)
> > >
> > > maybe_pull_tail doesn't guarantee to pull up to the length of the given
> > > parameter, e.g. if it is more then skb->len. Which I think means you
> > > need some other explicit skb len checks around the place, don't you?
> > > Otherwise you risk running past the end of the skb for a malformed
> > > frame.
> > >
> > 
> > Yes, that's true. Probably best to have maybe_pull_tail() return a failure if it
> > doesn't managed to pullup the requested amount then.
> > 
> > > > +
> > > > +	off = iph->ihl * 4;
> > > >
> > > >  	switch (iph->protocol) {
> > > >  	case IPPROTO_TCP:
> > > > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > > > +		if (!skb_partial_csum_set(skb, off,
> > > >  					  offsetof(struct tcphdr, check)))
> > > >  			goto out;
> > > >
> > > >  		if (recalculate_partial_csum) {
> > > >  			struct tcphdr *tcph = tcp_hdr(skb);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct tcphdr);
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > > >  			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph-
> > > >daddr,
> > > > -							 skb->len - iph->ihl*4,
> > > > +							 skb->len - off,
> > > >  							 IPPROTO_TCP, 0);
> > > >  		}
> > > >  		break;
> > > >  	case IPPROTO_UDP:
> > > > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > > > +		if (!skb_partial_csum_set(skb, off,
> > > >  					  offsetof(struct udphdr, check)))
> > > >  			goto out;
> > > >
> > > >  		if (recalculate_partial_csum) {
> > > >  			struct udphdr *udph = udp_hdr(skb);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct udphdr);
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > > >  			udph->check = ~csum_tcpudp_magic(iph->saddr,
> > > iph->daddr,
> > > > -							 skb->len - iph->ihl*4,
> > > > +							 skb->len - off,
> > > >  							 IPPROTO_UDP, 0);
> > > >  		}
> > > >  		break;
> > > >  	default:
> > > >  		if (net_ratelimit())
> > > > -			pr_err("Attempting to checksum a non-TCP/UDP
> > > packet, dropping a protocol %d packet\n",
> > > > +			pr_err("Attempting to checksum a non-TCP/UDP
> > > packet, "
> > > > +			       "dropping a protocol %d packet\n",
> > >
> > > I think Linux's coding style explicitly relaxes the 80-column limit for
> > > string literals.
> > >
> > 
> > Ok. Good to know.
> > 
> > > >  			       iph->protocol);
> > > >  		goto out;
> > > >  	}
> > > > @@ -922,6 +937,158 @@ out:
> > > >  	return err;
> > > >  }
> > > >
> > > > +static int checksum_setup_ipv6(struct sk_buff *skb, int
> > > recalculate_partial_csum)
> > > > +{
> > > > +	int err = -EPROTO;
> > > > +	struct ipv6hdr *ipv6h = (void *)skb->data;
> > > > +	u8 nexthdr;
> > > > +	unsigned int header_size;
> > > > +	unsigned int off;
> > > > +	bool fragment;
> > > > +	bool done;
> > > > +
> > > > +	done = false;
> > > > +
> > > > +	off = sizeof(struct ipv6hdr);
> > > > +
> > > > +	header_size = skb->network_header + off;
> > > > +	maybe_pull_tail(skb, header_size);
> > >
> > > Same comment about skb lengths?
> > >
> > > > +
> > > > +	nexthdr = ipv6h->nexthdr;
> > > > +
> > > > +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len))
> > > &&
> > > > +	       !done) {
> > > > +		switch (nexthdr) {
> > > > +		case IPPROTO_DSTOPTS:
> > > > +		case IPPROTO_HOPOPTS:
> > > > +		case IPPROTO_ROUTING: {
> > > > +			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct ipv6_opt_hdr);
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > > > +			nexthdr = hp->nexthdr;
> > > > +			off += ipv6_optlen(hp);
> > > > +			break;
> > > > +		}
> > > > +		case IPPROTO_AH: {
> > > > +			struct ip_auth_hdr *hp = (void *)(skb->data + off);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct ip_auth_hdr);
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > > > +			nexthdr = hp->nexthdr;
> > > > +			off += (hp->hdrlen+2)<<2;
> > >
> > > I wonder if the core header would welcome a ipv6_ahlen cf optlen?
> > >
> > 
> > Yes, it would be nicer wouldn't it.
> > 
> > > > +			break;
> > > > +		}
> > > > +		case IPPROTO_FRAGMENT:
> > > > +			fragment = true;
> > > > +			/* fall through */
> > > > +		default:
> > > > +			done = true;
> > > > +			break;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	if (!done) {
> > > > +		if (net_ratelimit())
> > > > +			pr_err("Failed to parse packet header\n");
> > >
> > > Is it a requirement of the protocol that the list of IPPROTO_* we handle
> > > must be followed by some other header? Do we really care or is it more
> > > the upper stacks problem? Presumably it can get similarly malformed
> > > packets off the wire?
> > 
> > I'd have to check but I'm pretty sure none of the protos we handle can be
> > terminating so if done is not set then the packet is malformed.
> > 
> > >
> > > Also you can use net_err_ratelimited.
> > 
> > Ok.
> > 
> > >
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	if (fragment) {
> > > > +		if (net_ratelimit())
> > > > +			pr_err("Packet is a fragment!\n");
> > >
> > > Is that a bad thing?
> > >
> > 
> > Well, you can't do TCP or UDP checksum offload on a fragment, can you?
> > 
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	switch (nexthdr) {
> > > > +	case IPPROTO_TCP:
> > > > +		if (!skb_partial_csum_set(skb, off,
> > > > +					  offsetof(struct tcphdr, check)))
> > > > +			goto out;
> > > > +
> > > > +		if (recalculate_partial_csum) {
> > > > +			struct tcphdr *tcph = tcp_hdr(skb);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct tcphdr);
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > > > +			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> > > > +						       &ipv6h->daddr,
> > > > +						       skb->len - off,
> > > > +						       IPPROTO_TCP, 0);
> > > > +		}
> > > > +		break;
> > > > +	case IPPROTO_UDP:
> > > > +		if (!skb_partial_csum_set(skb, off,
> > > > +					  offsetof(struct udphdr, check)))
> > > > +			goto out;
> > > > +
> > > > +		if (recalculate_partial_csum) {
> > > > +			struct udphdr *udph = udp_hdr(skb);
> > > > +
> > > > +			header_size = skb->network_header +
> > > > +				off +
> > > > +				sizeof(struct udphdr);
> > > > +			maybe_pull_tail(skb, header_size);
> > > > +
> > > > +			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
> > > > +						       &ipv6h->daddr,
> > > > +						       skb->len - off,
> > > > +						       IPPROTO_UDP, 0);
> > > > +		}
> > > > +		break;
> > > > +	default:
> > > > +		if (net_ratelimit())
> > > > +			pr_err("Attempting to checksum a non-TCP/UDP
> > > packet, "
> > > > +			       "dropping a protocol %d packet\n",
> > > > +			       nexthdr);
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	err = 0;
> > > > +
> > > > +out:
> > > > +	return err;
> > > > +}
> > > > +
> > > > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
> > > > +{
> > > > +	int err = -EPROTO;
> > > > +	int recalculate_partial_csum = 0;
> > > > +
> > > > +	/*
> > > > +	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
> > > > +	 * peers can fail to set NETRXF_csum_blank when sending a GSO
> > > > +	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
> > > > +	 * recalculate the partial checksum.
> > > > +	 */
> > > > +	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
> > >
> > > Is it worth making this ipv4 only. Assuming that anything which adds v6
> > > support has already fixed this bug? That would save time in
> > > checksum_setup_ipv6.
> > >
> > 
> > That's true.
> > 
> > > Speaking of which can checksum_setup_* be shared with netfront
> > > somehow?
> > > Or maybe even put in common code? (Perhaps as a future cleanup)
> > >
> > 
> > I think it really should be in common code, but I think that would be better
> > handled as a separate patch to remove the duplication.
> > 
> > Thanks for the comprehensive review!
> > 
> >   Paul
> > 
> > > > +		struct netfront_info *np = netdev_priv(dev);
> > > > +		np->rx_gso_checksum_fixup++;
> > > > +		skb->ip_summed = CHECKSUM_PARTIAL;
> > > > +		recalculate_partial_csum = 1;
> > > > +	}
> > > > +
> > > > +	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
> > > > +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > > > +		return 0;
> > > > +
> > > > +	if (skb->protocol == htons(ETH_P_IP))
> > > > +		err = checksum_setup_ip(skb, recalculate_partial_csum);
> > > > +	else if (skb->protocol == htons(ETH_P_IPV6))
> > > > +		err = checksum_setup_ipv6(skb, recalculate_partial_csum);
> > > > +
> > > > +	return err;
> > > > +}
> > > > +
> > > >  static int handle_incoming_queue(struct net_device *dev,
> > > [...]
> > 
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
  2013-11-19 10:28       ` Ian Campbell
@ 2013-11-19 11:24         ` Paul Durrant
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Durrant @ 2013-11-19 11:24 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Boris Ostrovsky, David Vrabel, xen-devel

> -----Original Message-----
> From: Ian Campbell
> Sent: 19 November 2013 10:29
> To: Paul Durrant
> Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org
> Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> IPv6 offloads
> 
> On Fri, 2013-11-15 at 11:19 +0000, Paul Durrant wrote:
> > > -----Original Message-----
> > > From: xen-devel-bounces@lists.xen.org [mailto:xen-devel-
> > > bounces@lists.xen.org] On Behalf Of Paul Durrant
> > > Sent: 07 November 2013 11:02
> > > To: Ian Campbell
> > > Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org
> > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for
> > > IPv6 offloads
> > >
> > > > -----Original Message-----
> > > > From: Ian Campbell
> > > > Sent: 07 November 2013 10:49
> > > > To: Paul Durrant
> > > > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel
> > > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support
> for
> > > > IPv6 offloads
> > > >
> > > > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote:
> > > > > This patch adds support for IPv6 checksum offload and GSO when
> those
> > > > > fetaures are available in the backend.
> > > > >
> > > > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > > > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > > > > Cc: David Vrabel <david.vrabel@citrix.com>
> > > > > ---
> > > > >  drivers/net/xen-netfront.c |  263
> > > > ++++++++++++++++++++++++++++++++++++++------
> > > > >  1 file changed, 231 insertions(+), 32 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > > > > index dd1011e..27212d8 100644
> > > > > --- a/drivers/net/xen-netfront.c
> > > > > +++ b/drivers/net/xen-netfront.c
> > > > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff
> *skb,
> > > > struct net_device *dev)
> > > > >  		tx->flags |= XEN_NETTXF_extra_info;
> > > > >
> > > > >  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> > > > > -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> > > > > +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> > > > SKB_GSO_TCPV6) ?
> > > > > +			          XEN_NETIF_GSO_TYPE_TCPV6 :
> > > > > +			          XEN_NETIF_GSO_TYPE_TCPV4;
> > > > >  		gso->u.gso.pad = 0;
> > > > >  		gso->u.gso.features = 0;
> > > > >
> > > > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct
> sk_buff
> > > > *skb,
> > > > >  		return -EINVAL;
> > > > >  	}
> > > > >
> > > > > -	/* Currently only TCPv4 S.O. is supported. */
> > > > > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > > > > +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> > > > > +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> > > > >  		if (net_ratelimit())
> > > > >  			pr_warn("Bad GSO type %d\n", gso-
> >u.gso.type);
> > > > >  		return -EINVAL;
> > > > >  	}
> > > > >
> > > > >  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > > > > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > > > > +	skb_shinfo(skb)->gso_type =
> > > > > +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4)
> ?
> > > > > +		SKB_GSO_TCPV4 :
> > > > > +		SKB_GSO_TCPV6;
> > > > >
> > > > >  	/* Header must be checked, and gso_segs computed. */
> > > > >  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > > > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct
> > > > netfront_info *np,
> > > > >  	return cons;
> > > > >  }
> > > > >
> > > > > -static int checksum_setup(struct net_device *dev, struct sk_buff
> *skb)
> > > > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int
> len)
> > > > >  {
> > > > > -	struct iphdr *iph;
> > > > > -	int err = -EPROTO;
> > > > > -	int recalculate_partial_csum = 0;
> > > > > -
> > > > > -	/*
> > > > > -	 * A GSO SKB must be CHECKSUM_PARTIAL. However some
> buggy
> > > > > -	 * peers can fail to set NETRXF_csum_blank when sending a
> GSO
> > > > > -	 * frame. In this case force the SKB to CHECKSUM_PARTIAL
> and
> > > > > -	 * recalculate the partial checksum.
> > > > > -	 */
> > > > > -	if (skb->ip_summed != CHECKSUM_PARTIAL &&
> skb_is_gso(skb)) {
> > > > > -		struct netfront_info *np = netdev_priv(dev);
> > > > > -		np->rx_gso_checksum_fixup++;
> > > > > -		skb->ip_summed = CHECKSUM_PARTIAL;
> > > > > -		recalculate_partial_csum = 1;
> > > > > +	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
> > > > > +		/* If we need to pullup then pullup to the max, so we
> > > > > +		 * won't need to do it again.
> > > > > +		 */
> > > > > +		int target = min_t(int, skb->len, MAX_TCP_HEADER);
> > > > > +		__pskb_pull_tail(skb, target - skb_headlen(skb));
> > > >
> > > > Should the functions "len" argument not be involved somewhere in
> this?
> > > > What if it is bigger than MAX_TCP_HEADER for example?
> > >
> > > Hmm, good point.
> > >
> >
> > Actually, now I look again, this is correct. The if statement checks
> > len to see if a pullup is necessary, but if one is necessary then it
> > always pulls up to the max.
> 
> So it is guaranteed that len <= MAX_TCP_HEADER in all circumstances?
> That might surprise some callers, since the name doesn't really indicate
> this has anything to do with TCP. How about passing max in as an
> argument, where all callers would currently pass MAX_TCP_HEADER?
> 

Ok, that sounds reasonable. It's unclear to me whether there is any upper bound on the size of an IPv6 header, but google hits a recent thread in the IETF mail archives (starting at http://www.ietf.org/mail-archive/web/ipv6/current/msg18199.html ) suggesting that 256 might be a good number to choose but I agree it's not directly related to 256, so maybe an argument and a local #define IPV6_MAX_HEADER of 256 would be better.

   Paul

> >  I still need to add a failure return to indicate the pullup did not
> > achieve the desired length though.
> >
> >   Paul
> >
> > > >
> > > > >  	}
> > > > > +}
> > > > >
> > > > > -	/* A non-CHECKSUM_PARTIAL SKB does not require setup.
> */
> > > > > -	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > > > > -		return 0;
> > > > > +static int checksum_setup_ip(struct sk_buff *skb, int
> > > > recalculate_partial_csum)
> > > > > +{
> > > >
> > > > > +	struct iphdr *iph = (void *)skb->data;
> > > > > +	unsigned int header_size;
> > > > > +	unsigned int off;
> > > > > +	int err = -EPROTO;
> > > > >
> > > > > -	if (skb->protocol != htons(ETH_P_IP))
> > > > > -		goto out;
> > > > > +	off = sizeof(struct iphdr);
> > > > >
> > > > > -	iph = (void *)skb->data;
> > > > > +	header_size = skb->network_header + off +
> MAX_IPOPTLEN;
> > > > > +	maybe_pull_tail(skb, header_size);
> > > >
> > > > You never reuse header_size other than setting it and immediately
> > > > passing it to maybe_pull_tail, it could be inlined into the function
> > > > call?
> > > >
> > >
> > > Yes, it could but I thought this was tidier. It's also the same as the code in
> > > netback.
> > >
> > > > (I found it confusing because you reset it in the recalculate_partial
> > > > rather than extending it which is what I expected)
> > > >
> > > > maybe_pull_tail doesn't guarantee to pull up to the length of the given
> > > > parameter, e.g. if it is more then skb->len. Which I think means you
> > > > need some other explicit skb len checks around the place, don't you?
> > > > Otherwise you risk running past the end of the skb for a malformed
> > > > frame.
> > > >
> > >
> > > Yes, that's true. Probably best to have maybe_pull_tail() return a failure if
> it
> > > doesn't managed to pullup the requested amount then.
> > >
> > > > > +
> > > > > +	off = iph->ihl * 4;
> > > > >
> > > > >  	switch (iph->protocol) {
> > > > >  	case IPPROTO_TCP:
> > > > > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > > > > +		if (!skb_partial_csum_set(skb, off,
> > > > >  					  offsetof(struct tcphdr,
> check)))
> > > > >  			goto out;
> > > > >
> > > > >  		if (recalculate_partial_csum) {
> > > > >  			struct tcphdr *tcph = tcp_hdr(skb);
> > > > > +
> > > > > +			header_size = skb->network_header +
> > > > > +				off +
> > > > > +				sizeof(struct tcphdr);
> > > > > +			maybe_pull_tail(skb, header_size);
> > > > > +
> > > > >  			tcph->check = ~csum_tcpudp_magic(iph-
> >saddr, iph-
> > > > >daddr,
> > > > > -							 skb->len -
> iph->ihl*4,
> > > > > +							 skb->len -
> off,
> > > > >
> IPPROTO_TCP, 0);
> > > > >  		}
> > > > >  		break;
> > > > >  	case IPPROTO_UDP:
> > > > > -		if (!skb_partial_csum_set(skb, 4 * iph->ihl,
> > > > > +		if (!skb_partial_csum_set(skb, off,
> > > > >  					  offsetof(struct udphdr,
> check)))
> > > > >  			goto out;
> > > > >
> > > > >  		if (recalculate_partial_csum) {
> > > > >  			struct udphdr *udph = udp_hdr(skb);
> > > > > +
> > > > > +			header_size = skb->network_header +
> > > > > +				off +
> > > > > +				sizeof(struct udphdr);
> > > > > +			maybe_pull_tail(skb, header_size);
> > > > > +
> > > > >  			udph->check = ~csum_tcpudp_magic(iph-
> >saddr,
> > > > iph->daddr,
> > > > > -							 skb->len -
> iph->ihl*4,
> > > > > +							 skb->len -
> off,
> > > > >
> IPPROTO_UDP, 0);
> > > > >  		}
> > > > >  		break;
> > > > >  	default:
> > > > >  		if (net_ratelimit())
> > > > > -			pr_err("Attempting to checksum a non-
> TCP/UDP
> > > > packet, dropping a protocol %d packet\n",
> > > > > +			pr_err("Attempting to checksum a non-
> TCP/UDP
> > > > packet, "
> > > > > +			       "dropping a protocol %d packet\n",
> > > >
> > > > I think Linux's coding style explicitly relaxes the 80-column limit for
> > > > string literals.
> > > >
> > >
> > > Ok. Good to know.
> > >
> > > > >  			       iph->protocol);
> > > > >  		goto out;
> > > > >  	}
> > > > > @@ -922,6 +937,158 @@ out:
> > > > >  	return err;
> > > > >  }
> > > > >
> > > > > +static int checksum_setup_ipv6(struct sk_buff *skb, int
> > > > recalculate_partial_csum)
> > > > > +{
> > > > > +	int err = -EPROTO;
> > > > > +	struct ipv6hdr *ipv6h = (void *)skb->data;
> > > > > +	u8 nexthdr;
> > > > > +	unsigned int header_size;
> > > > > +	unsigned int off;
> > > > > +	bool fragment;
> > > > > +	bool done;
> > > > > +
> > > > > +	done = false;
> > > > > +
> > > > > +	off = sizeof(struct ipv6hdr);
> > > > > +
> > > > > +	header_size = skb->network_header + off;
> > > > > +	maybe_pull_tail(skb, header_size);
> > > >
> > > > Same comment about skb lengths?
> > > >
> > > > > +
> > > > > +	nexthdr = ipv6h->nexthdr;
> > > > > +
> > > > > +	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h-
> >payload_len))
> > > > &&
> > > > > +	       !done) {
> > > > > +		switch (nexthdr) {
> > > > > +		case IPPROTO_DSTOPTS:
> > > > > +		case IPPROTO_HOPOPTS:
> > > > > +		case IPPROTO_ROUTING: {
> > > > > +			struct ipv6_opt_hdr *hp = (void *)(skb->data
> + off);
> > > > > +
> > > > > +			header_size = skb->network_header +
> > > > > +				off +
> > > > > +				sizeof(struct ipv6_opt_hdr);
> > > > > +			maybe_pull_tail(skb, header_size);
> > > > > +
> > > > > +			nexthdr = hp->nexthdr;
> > > > > +			off += ipv6_optlen(hp);
> > > > > +			break;
> > > > > +		}
> > > > > +		case IPPROTO_AH: {
> > > > > +			struct ip_auth_hdr *hp = (void *)(skb->data
> + off);
> > > > > +
> > > > > +			header_size = skb->network_header +
> > > > > +				off +
> > > > > +				sizeof(struct ip_auth_hdr);
> > > > > +			maybe_pull_tail(skb, header_size);
> > > > > +
> > > > > +			nexthdr = hp->nexthdr;
> > > > > +			off += (hp->hdrlen+2)<<2;
> > > >
> > > > I wonder if the core header would welcome a ipv6_ahlen cf optlen?
> > > >
> > >
> > > Yes, it would be nicer wouldn't it.
> > >
> > > > > +			break;
> > > > > +		}
> > > > > +		case IPPROTO_FRAGMENT:
> > > > > +			fragment = true;
> > > > > +			/* fall through */
> > > > > +		default:
> > > > > +			done = true;
> > > > > +			break;
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > > +	if (!done) {
> > > > > +		if (net_ratelimit())
> > > > > +			pr_err("Failed to parse packet header\n");
> > > >
> > > > Is it a requirement of the protocol that the list of IPPROTO_* we handle
> > > > must be followed by some other header? Do we really care or is it more
> > > > the upper stacks problem? Presumably it can get similarly malformed
> > > > packets off the wire?
> > >
> > > I'd have to check but I'm pretty sure none of the protos we handle can be
> > > terminating so if done is not set then the packet is malformed.
> > >
> > > >
> > > > Also you can use net_err_ratelimited.
> > >
> > > Ok.
> > >
> > > >
> > > > > +		goto out;
> > > > > +	}
> > > > > +
> > > > > +	if (fragment) {
> > > > > +		if (net_ratelimit())
> > > > > +			pr_err("Packet is a fragment!\n");
> > > >
> > > > Is that a bad thing?
> > > >
> > >
> > > Well, you can't do TCP or UDP checksum offload on a fragment, can you?
> > >
> > > > > +		goto out;
> > > > > +	}
> > > > > +
> > > > > +	switch (nexthdr) {
> > > > > +	case IPPROTO_TCP:
> > > > > +		if (!skb_partial_csum_set(skb, off,
> > > > > +					  offsetof(struct tcphdr,
> check)))
> > > > > +			goto out;
> > > > > +
> > > > > +		if (recalculate_partial_csum) {
> > > > > +			struct tcphdr *tcph = tcp_hdr(skb);
> > > > > +
> > > > > +			header_size = skb->network_header +
> > > > > +				off +
> > > > > +				sizeof(struct tcphdr);
> > > > > +			maybe_pull_tail(skb, header_size);
> > > > > +
> > > > > +			tcph->check = ~csum_ipv6_magic(&ipv6h-
> >saddr,
> > > > > +						       &ipv6h->daddr,
> > > > > +						       skb->len - off,
> > > > > +						       IPPROTO_TCP, 0);
> > > > > +		}
> > > > > +		break;
> > > > > +	case IPPROTO_UDP:
> > > > > +		if (!skb_partial_csum_set(skb, off,
> > > > > +					  offsetof(struct udphdr,
> check)))
> > > > > +			goto out;
> > > > > +
> > > > > +		if (recalculate_partial_csum) {
> > > > > +			struct udphdr *udph = udp_hdr(skb);
> > > > > +
> > > > > +			header_size = skb->network_header +
> > > > > +				off +
> > > > > +				sizeof(struct udphdr);
> > > > > +			maybe_pull_tail(skb, header_size);
> > > > > +
> > > > > +			udph->check = ~csum_ipv6_magic(&ipv6h-
> >saddr,
> > > > > +						       &ipv6h->daddr,
> > > > > +						       skb->len - off,
> > > > > +						       IPPROTO_UDP, 0);
> > > > > +		}
> > > > > +		break;
> > > > > +	default:
> > > > > +		if (net_ratelimit())
> > > > > +			pr_err("Attempting to checksum a non-
> TCP/UDP
> > > > packet, "
> > > > > +			       "dropping a protocol %d packet\n",
> > > > > +			       nexthdr);
> > > > > +		goto out;
> > > > > +	}
> > > > > +
> > > > > +	err = 0;
> > > > > +
> > > > > +out:
> > > > > +	return err;
> > > > > +}
> > > > > +
> > > > > +static int checksum_setup(struct net_device *dev, struct sk_buff
> *skb)
> > > > > +{
> > > > > +	int err = -EPROTO;
> > > > > +	int recalculate_partial_csum = 0;
> > > > > +
> > > > > +	/*
> > > > > +	 * A GSO SKB must be CHECKSUM_PARTIAL. However some
> buggy
> > > > > +	 * peers can fail to set NETRXF_csum_blank when sending a
> GSO
> > > > > +	 * frame. In this case force the SKB to CHECKSUM_PARTIAL
> and
> > > > > +	 * recalculate the partial checksum.
> > > > > +	 */
> > > > > +	if (skb->ip_summed != CHECKSUM_PARTIAL &&
> skb_is_gso(skb)) {
> > > >
> > > > Is it worth making this ipv4 only. Assuming that anything which adds v6
> > > > support has already fixed this bug? That would save time in
> > > > checksum_setup_ipv6.
> > > >
> > >
> > > That's true.
> > >
> > > > Speaking of which can checksum_setup_* be shared with netfront
> > > > somehow?
> > > > Or maybe even put in common code? (Perhaps as a future cleanup)
> > > >
> > >
> > > I think it really should be in common code, but I think that would be
> better
> > > handled as a separate patch to remove the duplication.
> > >
> > > Thanks for the comprehensive review!
> > >
> > >   Paul
> > >
> > > > > +		struct netfront_info *np = netdev_priv(dev);
> > > > > +		np->rx_gso_checksum_fixup++;
> > > > > +		skb->ip_summed = CHECKSUM_PARTIAL;
> > > > > +		recalculate_partial_csum = 1;
> > > > > +	}
> > > > > +
> > > > > +	/* A non-CHECKSUM_PARTIAL SKB does not require setup.
> */
> > > > > +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> > > > > +		return 0;
> > > > > +
> > > > > +	if (skb->protocol == htons(ETH_P_IP))
> > > > > +		err = checksum_setup_ip(skb,
> recalculate_partial_csum);
> > > > > +	else if (skb->protocol == htons(ETH_P_IPV6))
> > > > > +		err = checksum_setup_ipv6(skb,
> recalculate_partial_csum);
> > > > > +
> > > > > +	return err;
> > > > > +}
> > > > > +
> > > > >  static int handle_incoming_queue(struct net_device *dev,
> > > > [...]
> > >
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xen.org
> > > http://lists.xen.org/xen-devel
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: add support for IPv6 offloads
  2014-01-15 16:03     ` Jan Beulich
@ 2014-01-15 17:08       ` Paul Durrant
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Durrant @ 2014-01-15 17:08 UTC (permalink / raw)
  To: Jan Beulich, Andrew Cooper
  Cc: netdev, Boris Ostrovsky, David Vrabel, xen-devel

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 15 January 2014 16:04
> To: Andrew Cooper; Paul Durrant
> Cc: David Vrabel; xen-devel@lists.xen.org; Boris Ostrovsky;
> netdev@vger.kernel.org
> Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: add support for
> IPv6 offloads
> 
> >>> On 15.01.14 at 16:54, Paul Durrant <Paul.Durrant@citrix.com> wrote:
> >> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
> >> On 15/01/14 15:18, Paul Durrant wrote:
> >> > +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
> "%d", 1);
> >>
> >> "%d", 1 results in a constant string.  xenbus_write() would avoid a
> >> transitory memory allocation.
> >
> > This code is consistent with all the other xenbus_printf()s in the
> > neighbourhood and does it really matter?
> 
> I think we should always strive to have the simplest possible code
> that fulfills the purpose. And hence we shouldn't be setting further
> bad precedents. (In fact I have a patch queued to replace all the
> unnecessary xenbus_printf()s with xenbus_write()s on
> linux-2.6.18-xen.hg, and may look into porting this to the
> respective upstream components.)
> 

Ok. Personally I'd go for code consistency with this patch and then a full replacement... but I'll re-work it.

  Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: add support for IPv6 offloads
  2014-01-15 15:54   ` [Xen-devel] " Paul Durrant
  2014-01-15 16:03     ` Jan Beulich
@ 2014-01-15 16:03     ` Jan Beulich
  1 sibling, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2014-01-15 16:03 UTC (permalink / raw)
  To: Andrew Cooper, Paul Durrant
  Cc: netdev, Boris Ostrovsky, David Vrabel, xen-devel

>>> On 15.01.14 at 16:54, Paul Durrant <Paul.Durrant@citrix.com> wrote:
>> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
>> On 15/01/14 15:18, Paul Durrant wrote:
>> > +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
>> 
>> "%d", 1 results in a constant string.  xenbus_write() would avoid a
>> transitory memory allocation.
> 
> This code is consistent with all the other xenbus_printf()s in the 
> neighbourhood and does it really matter?

I think we should always strive to have the simplest possible code
that fulfills the purpose. And hence we shouldn't be setting further
bad precedents. (In fact I have a patch queued to replace all the
unnecessary xenbus_printf()s with xenbus_write()s on
linux-2.6.18-xen.hg, and may look into porting this to the
respective upstream components.)

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: add support for IPv6 offloads
  2014-01-15 15:25 ` [Xen-devel] " Andrew Cooper
@ 2014-01-15 15:54   ` Paul Durrant
  2014-01-15 15:54   ` [Xen-devel] " Paul Durrant
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Durrant @ 2014-01-15 15:54 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: netdev, Boris Ostrovsky, David Vrabel, xen-devel

> -----Original Message-----
> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
> Sent: 15 January 2014 15:25
> To: Paul Durrant
> Cc: netdev@vger.kernel.org; xen-devel@lists.xen.org; Boris Ostrovsky; David
> Vrabel
> Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: add support for
> IPv6 offloads
> 
> On 15/01/14 15:18, Paul Durrant wrote:
> > This patch adds support for IPv6 checksum offload and GSO when those
> > features are available in the backend.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > Cc: David Vrabel <david.vrabel@citrix.com>
> > ---
> >  drivers/net/xen-netfront.c |   48
> +++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 43 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> > index c41537b..321759f 100644
> > --- a/drivers/net/xen-netfront.c
> > +++ b/drivers/net/xen-netfront.c
> > @@ -617,7 +617,9 @@ static int xennet_start_xmit(struct sk_buff *skb,
> struct net_device *dev)
> >  		tx->flags |= XEN_NETTXF_extra_info;
> >
> >  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> > -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> > +		gso->u.gso.type = (skb_shinfo(skb)->gso_type &
> SKB_GSO_TCPV6) ?
> > +			XEN_NETIF_GSO_TYPE_TCPV6 :
> > +			XEN_NETIF_GSO_TYPE_TCPV4;
> >  		gso->u.gso.pad = 0;
> >  		gso->u.gso.features = 0;
> >
> > @@ -809,15 +811,18 @@ static int xennet_set_skb_gso(struct sk_buff
> *skb,
> >  		return -EINVAL;
> >  	}
> >
> > -	/* Currently only TCPv4 S.O. is supported. */
> > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> > +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
> >  		if (net_ratelimit())
> >  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
> >  		return -EINVAL;
> >  	}
> >
> >  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > +	skb_shinfo(skb)->gso_type =
> > +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> > +		SKB_GSO_TCPV4 :
> > +		SKB_GSO_TCPV6;
> >
> >  	/* Header must be checked, and gso_segs computed. */
> >  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > @@ -1191,6 +1196,15 @@ static netdev_features_t
> xennet_fix_features(struct net_device *dev,
> >  			features &= ~NETIF_F_SG;
> >  	}
> >
> > +	if (features & NETIF_F_IPV6_CSUM) {
> > +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> > +				 "feature-ipv6-csum-offload", "%d", &val) <
> 0)
> > +			val = 0;
> > +
> > +		if (!val)
> > +			features &= ~NETIF_F_IPV6_CSUM;
> > +	}
> > +
> >  	if (features & NETIF_F_TSO) {
> >  		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> >  				 "feature-gso-tcpv4", "%d", &val) < 0)
> > @@ -1200,6 +1214,15 @@ static netdev_features_t
> xennet_fix_features(struct net_device *dev,
> >  			features &= ~NETIF_F_TSO;
> >  	}
> >
> > +	if (features & NETIF_F_TSO6) {
> > +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> > +				 "feature-gso-tcpv6", "%d", &val) < 0)
> > +			val = 0;
> > +
> > +		if (!val)
> > +			features &= ~NETIF_F_TSO6;
> > +	}
> > +
> >  	return features;
> >  }
> >
> > @@ -1338,7 +1361,9 @@ static struct net_device
> *xennet_create_dev(struct xenbus_device *dev)
> >  	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
> >  	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
> >  				  NETIF_F_GSO_ROBUST;
> > -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG |
> NETIF_F_TSO;
> > +	netdev->hw_features	= NETIF_F_SG |
> > +				  NETIF_F_IPV6_CSUM |
> > +				  NETIF_F_TSO | NETIF_F_TSO6;
> >
> >  	/*
> >           * Assume that all hw features are available for now. This set
> > @@ -1716,6 +1741,19 @@ again:
> >  		goto abort_transaction;
> >  	}
> >
> > +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
> "%d", 1);
> 
> "%d", 1 results in a constant string.  xenbus_write() would avoid a
> transitory memory allocation.
> 
> ~Andrew
> 

This code is consistent with all the other xenbus_printf()s in the neighbourhood and does it really matter?

  Paul

> > +	if (err) {
> > +		message = "writing feature-gso-tcpv6";
> > +		goto abort_transaction;
> > +	}
> > +
> > +	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-
> offload",
> > +			    "%d", 1);
> > +	if (err) {
> > +		message = "writing feature-ipv6-csum-offload";
> > +		goto abort_transaction;
> > +	}
> > +
> >  	err = xenbus_transaction_end(xbt, 0);
> >  	if (err) {
> >  		if (err == -EAGAIN)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next] xen-netfront: add support for IPv6 offloads
  2014-01-15 15:18 [PATCH net-next] xen-netfront: add " Paul Durrant
  2014-01-15 15:25 ` [Xen-devel] " Andrew Cooper
@ 2014-01-15 15:25 ` Andrew Cooper
  1 sibling, 0 replies; 13+ messages in thread
From: Andrew Cooper @ 2014-01-15 15:25 UTC (permalink / raw)
  To: Paul Durrant; +Cc: netdev, Boris Ostrovsky, David Vrabel, xen-devel

On 15/01/14 15:18, Paul Durrant wrote:
> This patch adds support for IPv6 checksum offload and GSO when those
> features are available in the backend.
>
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> ---
>  drivers/net/xen-netfront.c |   48 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 43 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index c41537b..321759f 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -617,7 +617,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  		tx->flags |= XEN_NETTXF_extra_info;
>  
>  		gso->u.gso.size = skb_shinfo(skb)->gso_size;
> -		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> +		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
> +			XEN_NETIF_GSO_TYPE_TCPV6 :
> +			XEN_NETIF_GSO_TYPE_TCPV4;
>  		gso->u.gso.pad = 0;
>  		gso->u.gso.features = 0;
>  
> @@ -809,15 +811,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
>  		return -EINVAL;
>  	}
>  
> -	/* Currently only TCPv4 S.O. is supported. */
> -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> +	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
> +	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
>  		if (net_ratelimit())
>  			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
>  		return -EINVAL;
>  	}
>  
>  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> +	skb_shinfo(skb)->gso_type =
> +		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
> +		SKB_GSO_TCPV4 :
> +		SKB_GSO_TCPV6;
>  
>  	/* Header must be checked, and gso_segs computed. */
>  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> @@ -1191,6 +1196,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
>  			features &= ~NETIF_F_SG;
>  	}
>  
> +	if (features & NETIF_F_IPV6_CSUM) {
> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> +				 "feature-ipv6-csum-offload", "%d", &val) < 0)
> +			val = 0;
> +
> +		if (!val)
> +			features &= ~NETIF_F_IPV6_CSUM;
> +	}
> +
>  	if (features & NETIF_F_TSO) {
>  		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>  				 "feature-gso-tcpv4", "%d", &val) < 0)
> @@ -1200,6 +1214,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
>  			features &= ~NETIF_F_TSO;
>  	}
>  
> +	if (features & NETIF_F_TSO6) {
> +		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> +				 "feature-gso-tcpv6", "%d", &val) < 0)
> +			val = 0;
> +
> +		if (!val)
> +			features &= ~NETIF_F_TSO6;
> +	}
> +
>  	return features;
>  }
>  
> @@ -1338,7 +1361,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
>  	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
>  	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>  				  NETIF_F_GSO_ROBUST;
> -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
> +	netdev->hw_features	= NETIF_F_SG |
> +				  NETIF_F_IPV6_CSUM |
> +				  NETIF_F_TSO | NETIF_F_TSO6;
>  
>  	/*
>           * Assume that all hw features are available for now. This set
> @@ -1716,6 +1741,19 @@ again:
>  		goto abort_transaction;
>  	}
>  
> +	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);

"%d", 1 results in a constant string.  xenbus_write() would avoid a
transitory memory allocation.

~Andrew

> +	if (err) {
> +		message = "writing feature-gso-tcpv6";
> +		goto abort_transaction;
> +	}
> +
> +	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload",
> +			    "%d", 1);
> +	if (err) {
> +		message = "writing feature-ipv6-csum-offload";
> +		goto abort_transaction;
> +	}
> +
>  	err = xenbus_transaction_end(xbt, 0);
>  	if (err) {
>  		if (err == -EAGAIN)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH net-next] xen-netfront: add support for IPv6 offloads
@ 2014-01-15 15:18 Paul Durrant
  2014-01-15 15:25 ` [Xen-devel] " Andrew Cooper
  2014-01-15 15:25 ` Andrew Cooper
  0 siblings, 2 replies; 13+ messages in thread
From: Paul Durrant @ 2014-01-15 15:18 UTC (permalink / raw)
  To: netdev, xen-devel
  Cc: Paul Durrant, Konrad Rzeszutek Wilk, Boris Ostrovsky, David Vrabel

This patch adds support for IPv6 checksum offload and GSO when those
features are available in the backend.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
---
 drivers/net/xen-netfront.c |   48 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c41537b..321759f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -617,7 +617,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx->flags |= XEN_NETTXF_extra_info;
 
 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
-		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
+			XEN_NETIF_GSO_TYPE_TCPV6 :
+			XEN_NETIF_GSO_TYPE_TCPV4;
 		gso->u.gso.pad = 0;
 		gso->u.gso.features = 0;
 
@@ -809,15 +811,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
-	/* Currently only TCPv4 S.O. is supported. */
-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
+	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 		if (net_ratelimit())
 			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 		return -EINVAL;
 	}
 
 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type =
+		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
+		SKB_GSO_TCPV4 :
+		SKB_GSO_TCPV6;
 
 	/* Header must be checked, and gso_segs computed. */
 	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -1191,6 +1196,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_SG;
 	}
 
+	if (features & NETIF_F_IPV6_CSUM) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-ipv6-csum-offload", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_IPV6_CSUM;
+	}
+
 	if (features & NETIF_F_TSO) {
 		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
 				 "feature-gso-tcpv4", "%d", &val) < 0)
@@ -1200,6 +1214,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_TSO;
 	}
 
+	if (features & NETIF_F_TSO6) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-gso-tcpv6", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_TSO6;
+	}
+
 	return features;
 }
 
@@ -1338,7 +1361,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
 	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				  NETIF_F_GSO_ROBUST;
-	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
+	netdev->hw_features	= NETIF_F_SG |
+				  NETIF_F_IPV6_CSUM |
+				  NETIF_F_TSO | NETIF_F_TSO6;
 
 	/*
          * Assume that all hw features are available for now. This set
@@ -1716,6 +1741,19 @@ again:
 		goto abort_transaction;
 	}
 
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv6";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload",
+			    "%d", 1);
+	if (err) {
+		message = "writing feature-ipv6-csum-offload";
+		goto abort_transaction;
+	}
+
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
 		if (err == -EAGAIN)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH net-next] xen-netfront: add support for IPv6 offloads
@ 2014-01-15 15:18 Paul Durrant
  0 siblings, 0 replies; 13+ messages in thread
From: Paul Durrant @ 2014-01-15 15:18 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Boris Ostrovsky, Paul Durrant, David Vrabel

This patch adds support for IPv6 checksum offload and GSO when those
features are available in the backend.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
---
 drivers/net/xen-netfront.c |   48 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c41537b..321759f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -617,7 +617,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx->flags |= XEN_NETTXF_extra_info;
 
 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
-		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+		gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
+			XEN_NETIF_GSO_TYPE_TCPV6 :
+			XEN_NETIF_GSO_TYPE_TCPV4;
 		gso->u.gso.pad = 0;
 		gso->u.gso.features = 0;
 
@@ -809,15 +811,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
 		return -EINVAL;
 	}
 
-	/* Currently only TCPv4 S.O. is supported. */
-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
+	    gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 		if (net_ratelimit())
 			pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 		return -EINVAL;
 	}
 
 	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type =
+		(gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
+		SKB_GSO_TCPV4 :
+		SKB_GSO_TCPV6;
 
 	/* Header must be checked, and gso_segs computed. */
 	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -1191,6 +1196,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_SG;
 	}
 
+	if (features & NETIF_F_IPV6_CSUM) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-ipv6-csum-offload", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_IPV6_CSUM;
+	}
+
 	if (features & NETIF_F_TSO) {
 		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
 				 "feature-gso-tcpv4", "%d", &val) < 0)
@@ -1200,6 +1214,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev,
 			features &= ~NETIF_F_TSO;
 	}
 
+	if (features & NETIF_F_TSO6) {
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-gso-tcpv6", "%d", &val) < 0)
+			val = 0;
+
+		if (!val)
+			features &= ~NETIF_F_TSO6;
+	}
+
 	return features;
 }
 
@@ -1338,7 +1361,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
 	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				  NETIF_F_GSO_ROBUST;
-	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
+	netdev->hw_features	= NETIF_F_SG |
+				  NETIF_F_IPV6_CSUM |
+				  NETIF_F_TSO | NETIF_F_TSO6;
 
 	/*
          * Assume that all hw features are available for now. This set
@@ -1716,6 +1741,19 @@ again:
 		goto abort_transaction;
 	}
 
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv6";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload",
+			    "%d", 1);
+	if (err) {
+		message = "writing feature-ipv6-csum-offload";
+		goto abort_transaction;
+	}
+
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
 		if (err == -EAGAIN)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-01-15 17:08 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-11-05 11:59 [PATCH net-next] xen-netfront: Add support for IPv6 offloads Paul Durrant
2013-11-07 10:48 ` Ian Campbell
2013-11-07 11:02   ` Paul Durrant
2013-11-07 11:49     ` Ian Campbell
2013-11-15 11:19     ` Paul Durrant
2013-11-19 10:28       ` Ian Campbell
2013-11-19 11:24         ` Paul Durrant
2014-01-15 15:18 [PATCH net-next] xen-netfront: add " Paul Durrant
2014-01-15 15:25 ` [Xen-devel] " Andrew Cooper
2014-01-15 15:54   ` Paul Durrant
2014-01-15 15:54   ` [Xen-devel] " Paul Durrant
2014-01-15 16:03     ` Jan Beulich
2014-01-15 17:08       ` Paul Durrant
2014-01-15 16:03     ` Jan Beulich
2014-01-15 15:25 ` Andrew Cooper
2014-01-15 15:18 Paul Durrant

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.