All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 2/3] myri10ge: Add vlan rx for better GRO perf.
@ 2012-11-14 13:06 Andrew Gallatin
  2012-11-14 14:46 ` Eric Dumazet
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Gallatin @ 2012-11-14 13:06 UTC (permalink / raw)
  To: netdev



Unlike LRO, GRO requires that vlan tags be removed before
aggregation can occur.  Since the myri10ge NIC does not support
hardware vlan tag offload, we must remove the tag in the driver
to achieve performance comparable to LRO for vlan tagged frames.

Signed-off-by: Andrew Gallatin <gallatin@myri.com>
---
  drivers/net/ethernet/myricom/myri10ge/myri10ge.c |   47 
++++++++++++++++++++++
  1 file changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c 
b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index a5ab2f2..b9b6dfd 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1264,6 +1264,48 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
  	}
  }

+/*
+ * GRO does not support acceleration of tagged vlan frames, and
+ * this NIC does not support vlan tag offload, so we must pop
+ * the tag ourselves to be able to achieve GRO performance that
+ * is comparable to LRO.
+ */
+
+static inline void
+myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb)
+{
+	u8 *va;
+	struct vlan_ethhdr *veh;
+	struct ethhdr *eh;
+	struct skb_frag_struct *frag;
+	u16 proto;
+
+	va = addr;
+	va += MXGEFW_PAD;
+	veh = (struct vlan_ethhdr *) va;
+	if ((dev->features & (NETIF_F_HW_VLAN_RX | NETIF_F_GRO)) ==
+	    (NETIF_F_HW_VLAN_RX | NETIF_F_GRO) &&
+	    (veh->h_vlan_proto == ntohs(ETH_P_8021Q))) {
+		/* fixup csum if needed */
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_sub(skb->csum,
+					     csum_partial(va + ETH_HLEN,
+							  VLAN_HLEN, 0));
+		/* pop tag */
+		__vlan_hwaccel_put_tag(skb, ntohs(veh->h_vlan_TCI));
+		proto = veh->h_vlan_encapsulated_proto;
+		memmove(va + VLAN_HLEN, va, ETH_HLEN);
+		va += VLAN_HLEN;
+		eh = (struct ethhdr *)va;
+		eh->h_proto = proto;
+		skb->len -= VLAN_HLEN;
+		skb->data_len -= VLAN_HLEN;
+		frag = skb_shinfo(skb)->frags;
+		frag->page_offset += VLAN_HLEN;
+		skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
+	}
+}
+
  static inline int
  myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum)
  {
@@ -1329,6 +1371,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, 
int len, __wsum csum)
  		skb->ip_summed = CHECKSUM_COMPLETE;
  		skb->csum = csum;
  	}
+	myri10ge_vlan_rx(mgp->dev, va, skb);
  	skb_record_rx_queue(skb, ss - &mgp->ss[0]);

  	napi_gro_frags(&ss->napi);
@@ -3854,6 +3897,10 @@ static int myri10ge_probe(struct pci_dev *pdev, 
const struct pci_device_id *ent)
  	netdev->netdev_ops = &myri10ge_netdev_ops;
  	netdev->mtu = myri10ge_initial_mtu;
  	netdev->hw_features = mgp->features | NETIF_F_RXCSUM;
+
+	/* fake NETIF_F_HW_VLAN_RX for good GRO performance */
+	netdev->hw_features |= NETIF_F_HW_VLAN_RX;
+
  	netdev->features = netdev->hw_features;

  	if (dac_enabled)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 2/3] myri10ge: Add vlan rx for better GRO perf.
  2012-11-14 13:06 [PATCH net-next 2/3] myri10ge: Add vlan rx for better GRO perf Andrew Gallatin
@ 2012-11-14 14:46 ` Eric Dumazet
  2012-11-14 15:43   ` Andrew Gallatin
  2012-11-14 16:32   ` [PATCH v2 " Andrew Gallatin
  0 siblings, 2 replies; 4+ messages in thread
From: Eric Dumazet @ 2012-11-14 14:46 UTC (permalink / raw)
  To: Andrew Gallatin; +Cc: netdev

On Wed, 2012-11-14 at 08:06 -0500, Andrew Gallatin wrote:
> 
> Unlike LRO, GRO requires that vlan tags be removed before
> aggregation can occur.  Since the myri10ge NIC does not support
> hardware vlan tag offload, we must remove the tag in the driver
> to achieve performance comparable to LRO for vlan tagged frames.
> 
> Signed-off-by: Andrew Gallatin <gallatin@myri.com>
> ---
>   drivers/net/ethernet/myricom/myri10ge/myri10ge.c |   47 
> ++++++++++++++++++++++
>   1 file changed, 47 insertions(+)
> 
> diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c 
> b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
> index a5ab2f2..b9b6dfd 100644
> --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
> +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
> @@ -1264,6 +1264,48 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
>   	}
>   }
> 
> +/*
> + * GRO does not support acceleration of tagged vlan frames, and
> + * this NIC does not support vlan tag offload, so we must pop
> + * the tag ourselves to be able to achieve GRO performance that
> + * is comparable to LRO.
> + */
> +
> +static inline void
> +myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb)
> +{
> +	u8 *va;
> +	struct vlan_ethhdr *veh;
> +	struct ethhdr *eh;
> +	struct skb_frag_struct *frag;
> +	u16 proto;
> +
> +	va = addr;
> +	va += MXGEFW_PAD;
> +	veh = (struct vlan_ethhdr *) va;
> +	if ((dev->features & (NETIF_F_HW_VLAN_RX | NETIF_F_GRO)) ==
> +	    (NETIF_F_HW_VLAN_RX | NETIF_F_GRO) &&
> +	    (veh->h_vlan_proto == ntohs(ETH_P_8021Q))) {
> +		/* fixup csum if needed */
> +		if (skb->ip_summed == CHECKSUM_COMPLETE)
> +			skb->csum = csum_sub(skb->csum,
> +					     csum_partial(va + ETH_HLEN,
> +							  VLAN_HLEN, 0));
> +		/* pop tag */
> +		__vlan_hwaccel_put_tag(skb, ntohs(veh->h_vlan_TCI));
> +		proto = veh->h_vlan_encapsulated_proto;
I am not sure you need this @proto ?

> +		memmove(va + VLAN_HLEN, va, ETH_HLEN);

You could only memmove the mac addresses (2 * ETH_ALEN)
To not touch the proto (and avoid possible aliasing problems)

> +		va += VLAN_HLEN;
> +		eh = (struct ethhdr *)va;
> +		eh->h_proto = proto;

and this should not be needed ?


> +		skb->len -= VLAN_HLEN;
> +		skb->data_len -= VLAN_HLEN;
> +		frag = skb_shinfo(skb)->frags;

> +		frag->page_offset += VLAN_HLEN;
> +		skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
> +	}
> +}
> +

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next 2/3] myri10ge: Add vlan rx for better GRO perf.
  2012-11-14 14:46 ` Eric Dumazet
@ 2012-11-14 15:43   ` Andrew Gallatin
  2012-11-14 16:32   ` [PATCH v2 " Andrew Gallatin
  1 sibling, 0 replies; 4+ messages in thread
From: Andrew Gallatin @ 2012-11-14 15:43 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

On 11/14/12 09:46, Eric Dumazet wrote:
> On Wed, 2012-11-14 at 08:06 -0500, Andrew Gallatin wrote:
>>
>> Unlike LRO, GRO requires that vlan tags be removed before
>> aggregation can occur.  Since the myri10ge NIC does not support
>> hardware vlan tag offload, we must remove the tag in the driver
>> to achieve performance comparable to LRO for vlan tagged frames.
>>
>> Signed-off-by: Andrew Gallatin <gallatin@myri.com>
>> ---
>>    drivers/net/ethernet/myricom/myri10ge/myri10ge.c |   47
>> ++++++++++++++++++++++
>>    1 file changed, 47 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
>> b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
>> index a5ab2f2..b9b6dfd 100644
>> --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
>> +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
>> @@ -1264,6 +1264,48 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
>>    	}
>>    }
>>
>> +/*
>> + * GRO does not support acceleration of tagged vlan frames, and
>> + * this NIC does not support vlan tag offload, so we must pop
>> + * the tag ourselves to be able to achieve GRO performance that
>> + * is comparable to LRO.
>> + */
>> +
>> +static inline void
>> +myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb)
>> +{
>> +	u8 *va;
>> +	struct vlan_ethhdr *veh;
>> +	struct ethhdr *eh;
>> +	struct skb_frag_struct *frag;
>> +	u16 proto;
>> +
>> +	va = addr;
>> +	va += MXGEFW_PAD;
>> +	veh = (struct vlan_ethhdr *) va;
>> +	if ((dev->features & (NETIF_F_HW_VLAN_RX | NETIF_F_GRO)) ==
>> +	    (NETIF_F_HW_VLAN_RX | NETIF_F_GRO) &&
>> +	    (veh->h_vlan_proto == ntohs(ETH_P_8021Q))) {
>> +		/* fixup csum if needed */
>> +		if (skb->ip_summed == CHECKSUM_COMPLETE)
>> +			skb->csum = csum_sub(skb->csum,
>> +					     csum_partial(va + ETH_HLEN,
>> +							  VLAN_HLEN, 0));
>> +		/* pop tag */
>> +		__vlan_hwaccel_put_tag(skb, ntohs(veh->h_vlan_TCI));
>> +		proto = veh->h_vlan_encapsulated_proto;
> I am not sure you need this @proto ?
>
>> +		memmove(va + VLAN_HLEN, va, ETH_HLEN);
>
> You could only memmove the mac addresses (2 * ETH_ALEN)
> To not touch the proto (and avoid possible aliasing problems)
>
>> +		va += VLAN_HLEN;
>> +		eh = (struct ethhdr *)va;
>> +		eh->h_proto = proto;
>
> and this should not be needed ?

Indeed, your suggestion works and is simpler and less risky.
Thank you for your help.

I also think that I am making a mistake by only popping the tag when
GRO is enabled.  My fear is that something will become confused when
skb->dev->features contains NETIF_F_HW_VLAN_RX, but the tag is not
decap'ed.  So I will remove the check for NETIF_F_GRO when popping
the vlan tag.

Thanks,

Drew

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 net-next 2/3] myri10ge: Add vlan rx for better GRO perf.
  2012-11-14 14:46 ` Eric Dumazet
  2012-11-14 15:43   ` Andrew Gallatin
@ 2012-11-14 16:32   ` Andrew Gallatin
  1 sibling, 0 replies; 4+ messages in thread
From: Andrew Gallatin @ 2012-11-14 16:32 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet


Unlike LRO, GRO requires that vlan tags be removed before
aggregation can occur.  Since the myri10ge NIC does not support
hardware vlan tag offload, we must remove the tag in the driver
to achieve performance comparable to LRO for vlan tagged frames.

Updated with change suggested by Eric Duzamet to simplify the vlan
tag popping & a change by me to always pop tags when
NETIF_F_HW_VLAN_RX is set.

Signed-off-by: Andrew Gallatin <gallatin@myri.com>
---
  drivers/net/ethernet/myricom/myri10ge/myri10ge.c |   40 
++++++++++++++++++++++
  1 file changed, 40 insertions(+)

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c 
b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index a5ab2f2..93ed089 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1264,6 +1264,41 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
  	}
  }

+/*
+ * GRO does not support acceleration of tagged vlan frames, and
+ * this NIC does not support vlan tag offload, so we must pop
+ * the tag ourselves to be able to achieve GRO performance that
+ * is comparable to LRO.
+ */
+
+static inline void
+myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb)
+{
+	u8 *va;
+	struct vlan_ethhdr *veh;
+	struct skb_frag_struct *frag;
+
+	va = addr;
+	va += MXGEFW_PAD;
+	veh = (struct vlan_ethhdr *) va;
+	if ((dev->features & (NETIF_F_HW_VLAN_RX)) == NETIF_F_HW_VLAN_RX &&
+	    (veh->h_vlan_proto == ntohs(ETH_P_8021Q))) {
+		/* fixup csum if needed */
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_sub(skb->csum,
+					     csum_partial(va + ETH_HLEN,
+							  VLAN_HLEN, 0));
+		/* pop tag */
+		__vlan_hwaccel_put_tag(skb, ntohs(veh->h_vlan_TCI));
+		memmove(va + VLAN_HLEN, va, 2 * ETH_ALEN);
+		skb->len -= VLAN_HLEN;
+		skb->data_len -= VLAN_HLEN;
+		frag = skb_shinfo(skb)->frags;
+		frag->page_offset += VLAN_HLEN;
+		skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
+	}
+}
+
  static inline int
  myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum)
  {
@@ -1329,6 +1364,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, 
int len, __wsum csum)
  		skb->ip_summed = CHECKSUM_COMPLETE;
  		skb->csum = csum;
  	}
+	myri10ge_vlan_rx(mgp->dev, va, skb);
  	skb_record_rx_queue(skb, ss - &mgp->ss[0]);

  	napi_gro_frags(&ss->napi);
@@ -3854,6 +3890,10 @@ static int myri10ge_probe(struct pci_dev *pdev, 
const struct pci_device_id *ent)
  	netdev->netdev_ops = &myri10ge_netdev_ops;
  	netdev->mtu = myri10ge_initial_mtu;
  	netdev->hw_features = mgp->features | NETIF_F_RXCSUM;
+
+	/* fake NETIF_F_HW_VLAN_RX for good GRO performance */
+	netdev->hw_features |= NETIF_F_HW_VLAN_RX;
+
  	netdev->features = netdev->hw_features;

  	if (dac_enabled)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-11-14 16:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-14 13:06 [PATCH net-next 2/3] myri10ge: Add vlan rx for better GRO perf Andrew Gallatin
2012-11-14 14:46 ` Eric Dumazet
2012-11-14 15:43   ` Andrew Gallatin
2012-11-14 16:32   ` [PATCH v2 " Andrew Gallatin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.