All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NIU support for skb->rxhash
@ 2010-04-22 11:21 David Miller
  2010-04-22 11:43 ` Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: David Miller @ 2010-04-22 11:21 UTC (permalink / raw)
  To: netdev


But it turns out using it is largely pointless since the only way to
get the hash value(s) is through a structure which is prepended to the
packet data (so we take a cache miss on the packet data anyways)
instead of being able to fetch it out of the RX descriptors :-/

If anyone out there is trying to design sane hardware, please put the
following into your RX descriptors:

1) ethernet protocol type (u16)
2) a flag bit indicating if the packet destination matched one
   of the programmed unicast MAC addresses
3) a flag bit indicating "multicast"
4) a flag bit indicating "broadcast"
5) at least 32-bits of the computed flow hash (u32)

kthx, bye!

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 493e25c..f8ee985 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -36,8 +36,8 @@
 #include "niu.h"
 
 #define DRV_MODULE_NAME		"niu"
-#define DRV_MODULE_VERSION	"1.0"
-#define DRV_MODULE_RELDATE	"Nov 14, 2008"
+#define DRV_MODULE_VERSION	"1.1"
+#define DRV_MODULE_RELDATE	"Apr 22, 2010"
 
 static char version[] __devinitdata =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -3444,6 +3444,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 			      struct rx_ring_info *rp)
 {
 	unsigned int index = rp->rcr_index;
+	struct rx_pkt_hdr1 *rh;
 	struct sk_buff *skb;
 	int len, num_rcr;
 
@@ -3477,9 +3478,6 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 		if (num_rcr == 1) {
 			int ptype;
 
-			off += 2;
-			append_size -= 2;
-
 			ptype = (val >> RCR_ENTRY_PKT_TYPE_SHIFT);
 			if ((ptype == RCR_PKT_TYPE_TCP ||
 			     ptype == RCR_PKT_TYPE_UDP) &&
@@ -3488,8 +3486,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 			else
 				skb->ip_summed = CHECKSUM_NONE;
-		}
-		if (!(val & RCR_ENTRY_MULTI))
+		} else if (!(val & RCR_ENTRY_MULTI))
 			append_size = len - skb->len;
 
 		niu_rx_skb_append(skb, page, off, append_size);
@@ -3510,8 +3507,16 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 	}
 	rp->rcr_index = index;
 
-	skb_reserve(skb, NET_IP_ALIGN);
-	__pskb_pull_tail(skb, min(len, VLAN_ETH_HLEN));
+	len += sizeof(*rh);
+	len = min_t(int, len, sizeof(*rh) + VLAN_ETH_HLEN);
+	__pskb_pull_tail(skb, len);
+
+	rh = (struct rx_pkt_hdr1 *) skb->data;
+	skb->rxhash = ((u32)rh->hashval2_0 << 24 |
+		       (u32)rh->hashval2_1 << 16 |
+		       (u32)rh->hashval1_1 << 8 |
+		       (u32)rh->hashval1_2 << 0);
+	skb_pull(skb, sizeof(*rh));
 
 	rp->rx_packets++;
 	rp->rx_bytes += skb->len;
@@ -4946,7 +4951,9 @@ static int niu_init_one_rx_channel(struct niu *np, struct rx_ring_info *rp)
 	      RX_DMA_CTL_STAT_RCRTO |
 	      RX_DMA_CTL_STAT_RBR_EMPTY));
 	nw64(RXDMA_CFIG1(channel), rp->mbox_dma >> 32);
-	nw64(RXDMA_CFIG2(channel), (rp->mbox_dma & 0x00000000ffffffc0));
+	nw64(RXDMA_CFIG2(channel),
+	     ((rp->mbox_dma & RXDMA_CFIG2_MBADDR_L) |
+	      RXDMA_CFIG2_FULL_HDR));
 	nw64(RBR_CFIG_A(channel),
 	     ((u64)rp->rbr_table_size << RBR_CFIG_A_LEN_SHIFT) |
 	     (rp->rbr_dma & (RBR_CFIG_A_STADDR_BASE | RBR_CFIG_A_STADDR)));
diff --git a/drivers/net/niu.h b/drivers/net/niu.h
index 3bd0b59..d671546 100644
--- a/drivers/net/niu.h
+++ b/drivers/net/niu.h
@@ -2706,7 +2706,7 @@ struct rx_pkt_hdr0 {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u8	inputport:2,
 		maccheck:1,
-		class:4;
+		class:5;
 	u8	vlan:1,
 		llcsnap:1,
 		noport:1,
@@ -2715,7 +2715,7 @@ struct rx_pkt_hdr0 {
 		tres:2,
 		tzfvld:1;
 #elif defined(__BIG_ENDIAN_BITFIELD)
-	u8	class:4,
+	u8	class:5,
 		maccheck:1,
 		inputport:2;
 	u8	tzfvld:1,
@@ -2775,6 +2775,9 @@ struct rx_pkt_hdr1 {
 	/* Bits 7:0 of hash value, H1.  */
 	u8	hashval1_2;
 
+	u8	hwrsvd5;
+	u8	hwrsvd6;
+
 	u8	usrdata_0;	/* Bits 39:32 of user data.  */
 	u8	usrdata_1;	/* Bits 31:24 of user data.  */
 	u8	usrdata_2;	/* Bits 23:16 of user data.  */

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 11:21 [PATCH] NIU support for skb->rxhash David Miller
@ 2010-04-22 11:43 ` Eric Dumazet
  2010-04-22 21:19   ` David Miller
  2010-04-22 16:21 ` Stephen Hemminger
  2010-04-22 22:53 ` David Miller
  2 siblings, 1 reply; 12+ messages in thread
From: Eric Dumazet @ 2010-04-22 11:43 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Le jeudi 22 avril 2010 à 04:21 -0700, David Miller a écrit :
> But it turns out using it is largely pointless since the only way to
> get the hash value(s) is through a structure which is prepended to the
> packet data (so we take a cache miss on the packet data anyways)
> instead of being able to fetch it out of the RX descriptors :-/
> 
> If anyone out there is trying to design sane hardware, please put the
> following into your RX descriptors:
> 
> 1) ethernet protocol type (u16)
> 2) a flag bit indicating if the packet destination matched one
>    of the programmed unicast MAC addresses
> 3) a flag bit indicating "multicast"
> 4) a flag bit indicating "broadcast"
> 5) at least 32-bits of the computed flow hash (u32)
> 
> kthx, bye!

Then, our stack also touch all 256 bytes of skb structure itself.

offsetof(struct sk_buff, next)    =0x0
offsetof(struct sk_buff, rxhash)  =0xa8
offsetof(struct sk_buff, dev)     =0x20
offsetof(struct sk_buff, len)     =0x68
offsetof(struct sk_buff, protocol)=0x7e
offsetof(struct sk_buff, network_header)=0xc0
offsetof(struct sk_buff, data)    =0xd8
offsetof(struct sk_buff, head)    =0xd0

Time for a reordering I guess ;)



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 11:21 [PATCH] NIU support for skb->rxhash David Miller
  2010-04-22 11:43 ` Eric Dumazet
@ 2010-04-22 16:21 ` Stephen Hemminger
  2010-04-22 21:36   ` David Miller
  2010-04-22 21:37   ` David Miller
  2010-04-22 22:53 ` David Miller
  2 siblings, 2 replies; 12+ messages in thread
From: Stephen Hemminger @ 2010-04-22 16:21 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Thu, 22 Apr 2010 04:21:57 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> 
> But it turns out using it is largely pointless since the only way to
> get the hash value(s) is through a structure which is prepended to the
> packet data (so we take a cache miss on the packet data anyways)
> instead of being able to fetch it out of the RX descriptors :-/
> 
> If anyone out there is trying to design sane hardware, please put the
> following into your RX descriptors:
> 
> 1) ethernet protocol type (u16)
> 2) a flag bit indicating if the packet destination matched one
>    of the programmed unicast MAC addresses
> 3) a flag bit indicating "multicast"
> 4) a flag bit indicating "broadcast"
> 5) at least 32-bits of the computed flow hash (u32)
> 
> kthx, bye!
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

Could you make configurable via ethtool like I did for sky2.

P.s: where is that patch seems lost in patchwork

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 11:43 ` Eric Dumazet
@ 2010-04-22 21:19   ` David Miller
  2010-04-23  8:14     ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2010-04-22 21:19 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 22 Apr 2010 13:43:18 +0200

> Then, our stack also touch all 256 bytes of skb structure itself.
> 
> offsetof(struct sk_buff, next)    =0x0
> offsetof(struct sk_buff, rxhash)  =0xa8
> offsetof(struct sk_buff, dev)     =0x20
> offsetof(struct sk_buff, len)     =0x68
> offsetof(struct sk_buff, protocol)=0x7e
> offsetof(struct sk_buff, network_header)=0xc0
> offsetof(struct sk_buff, data)    =0xd8
> offsetof(struct sk_buff, head)    =0xd0
> 
> Time for a reordering I guess ;)

Indeed.

Also I have some ideas about what we can do if we have
just the rxhash.  It seems we can avoid the type_trans
overhead on the interrupting cpu.

Things like eth_type_trans() become a netdev operation rather than
something drivers statically call by hand. ->ndo_type_trans or
similar.

SKB has a state bit saying whether ->ndo_type_trans has been invoked
yet on RX.

Drivers pass raw SKBs up into the stack.

We defer the ->ndo_type_trans as far as possible, for RPS when we have
->rxhash we can defer this all the way to the destination RPS cpu.

If we lack ->rxhash, the source cpu will need to invoke
->ndo_type_trans before it can begin parsing the packet.

Anyways, something like that.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 16:21 ` Stephen Hemminger
@ 2010-04-22 21:36   ` David Miller
  2010-04-22 22:11     ` Stephen Hemminger
  2010-04-22 21:37   ` David Miller
  1 sibling, 1 reply; 12+ messages in thread
From: David Miller @ 2010-04-22 21:36 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 22 Apr 2010 09:21:20 -0700

> P.s: where is that patch seems lost in patchwork

I marked it as RFC since that's what it was.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 16:21 ` Stephen Hemminger
  2010-04-22 21:36   ` David Miller
@ 2010-04-22 21:37   ` David Miller
  1 sibling, 0 replies; 12+ messages in thread
From: David Miller @ 2010-04-22 21:37 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 22 Apr 2010 09:21:20 -0700

> Could you make configurable via ethtool like I did for sky2.

I'd much rather not.

I don't want to have two code different paths, extra conditionals,
etc. here.  And there's no need to really, it's cheap enough.

I think drivers should unconditionally provide ->rxhash if they
reasonably can, otherwide don't bother at all.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 21:36   ` David Miller
@ 2010-04-22 22:11     ` Stephen Hemminger
  2010-04-22 22:24       ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2010-04-22 22:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Thu, 22 Apr 2010 14:36:06 -0700 (PDT)
David Miller <davem@davemloft.net> wrote:

> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Thu, 22 Apr 2010 09:21:20 -0700
> 
> > P.s: where is that patch seems lost in patchwork
> 
> I marked it as RFC since that's what it was.


It works fine, put it net-next

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 22:11     ` Stephen Hemminger
@ 2010-04-22 22:24       ` David Miller
  0 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2010-04-22 22:24 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 22 Apr 2010 15:11:40 -0700

> On Thu, 22 Apr 2010 14:36:06 -0700 (PDT)
> David Miller <davem@davemloft.net> wrote:
> 
>> From: Stephen Hemminger <shemminger@vyatta.com>
>> Date: Thu, 22 Apr 2010 09:21:20 -0700
>> 
>> > P.s: where is that patch seems lost in patchwork
>> 
>> I marked it as RFC since that's what it was.
> 
> 
> It works fine, put it net-next

There is nothing CONFIG_RPS dependent about ->rxhash, we could
use it for many other things.

Please take away that CONFIG_RPS ifdef and I'll apply it.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 11:21 [PATCH] NIU support for skb->rxhash David Miller
  2010-04-22 11:43 ` Eric Dumazet
  2010-04-22 16:21 ` Stephen Hemminger
@ 2010-04-22 22:53 ` David Miller
  2 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2010-04-22 22:53 UTC (permalink / raw)
  To: netdev


Ok, based upon Stephen's feedback I added the ethtool bits,
here is the final version I committed to net-next-2.6:

--------------------
niu: Add skb->rxhash support.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/niu.c |   52 ++++++++++++++++++++++++++++++++++++++++------------
 drivers/net/niu.h |    7 +++++--
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 493e25c..30abb4e 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -36,8 +36,8 @@
 #include "niu.h"
 
 #define DRV_MODULE_NAME		"niu"
-#define DRV_MODULE_VERSION	"1.0"
-#define DRV_MODULE_RELDATE	"Nov 14, 2008"
+#define DRV_MODULE_VERSION	"1.1"
+#define DRV_MODULE_RELDATE	"Apr 22, 2010"
 
 static char version[] __devinitdata =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -3444,6 +3444,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 			      struct rx_ring_info *rp)
 {
 	unsigned int index = rp->rcr_index;
+	struct rx_pkt_hdr1 *rh;
 	struct sk_buff *skb;
 	int len, num_rcr;
 
@@ -3477,9 +3478,6 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 		if (num_rcr == 1) {
 			int ptype;
 
-			off += 2;
-			append_size -= 2;
-
 			ptype = (val >> RCR_ENTRY_PKT_TYPE_SHIFT);
 			if ((ptype == RCR_PKT_TYPE_TCP ||
 			     ptype == RCR_PKT_TYPE_UDP) &&
@@ -3488,8 +3486,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 			else
 				skb->ip_summed = CHECKSUM_NONE;
-		}
-		if (!(val & RCR_ENTRY_MULTI))
+		} else if (!(val & RCR_ENTRY_MULTI))
 			append_size = len - skb->len;
 
 		niu_rx_skb_append(skb, page, off, append_size);
@@ -3510,8 +3507,17 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np,
 	}
 	rp->rcr_index = index;
 
-	skb_reserve(skb, NET_IP_ALIGN);
-	__pskb_pull_tail(skb, min(len, VLAN_ETH_HLEN));
+	len += sizeof(*rh);
+	len = min_t(int, len, sizeof(*rh) + VLAN_ETH_HLEN);
+	__pskb_pull_tail(skb, len);
+
+	rh = (struct rx_pkt_hdr1 *) skb->data;
+	if (np->dev->features & NETIF_F_RXHASH)
+		skb->rxhash = ((u32)rh->hashval2_0 << 24 |
+			       (u32)rh->hashval2_1 << 16 |
+			       (u32)rh->hashval1_1 << 8 |
+			       (u32)rh->hashval1_2 << 0);
+	skb_pull(skb, sizeof(*rh));
 
 	rp->rx_packets++;
 	rp->rx_bytes += skb->len;
@@ -4946,7 +4952,9 @@ static int niu_init_one_rx_channel(struct niu *np, struct rx_ring_info *rp)
 	      RX_DMA_CTL_STAT_RCRTO |
 	      RX_DMA_CTL_STAT_RBR_EMPTY));
 	nw64(RXDMA_CFIG1(channel), rp->mbox_dma >> 32);
-	nw64(RXDMA_CFIG2(channel), (rp->mbox_dma & 0x00000000ffffffc0));
+	nw64(RXDMA_CFIG2(channel),
+	     ((rp->mbox_dma & RXDMA_CFIG2_MBADDR_L) |
+	      RXDMA_CFIG2_FULL_HDR));
 	nw64(RBR_CFIG_A(channel),
 	     ((u64)rp->rbr_table_size << RBR_CFIG_A_LEN_SHIFT) |
 	     (rp->rbr_dma & (RBR_CFIG_A_STADDR_BASE | RBR_CFIG_A_STADDR)));
@@ -7910,6 +7918,18 @@ static int niu_phys_id(struct net_device *dev, u32 data)
 	return 0;
 }
 
+static int niu_set_flags(struct net_device *dev, u32 data)
+{
+	if (data & (ETH_FLAG_LRO | ETH_FLAG_NTUPLE))
+		return -EOPNOTSUPP;
+
+	if (data & ETH_FLAG_RXHASH)
+		dev->features |= NETIF_F_RXHASH;
+	else
+		dev->features &= ~NETIF_F_RXHASH;
+	return 0;
+}
+
 static const struct ethtool_ops niu_ethtool_ops = {
 	.get_drvinfo		= niu_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
@@ -7926,6 +7946,8 @@ static const struct ethtool_ops niu_ethtool_ops = {
 	.phys_id		= niu_phys_id,
 	.get_rxnfc		= niu_get_nfc,
 	.set_rxnfc		= niu_set_nfc,
+	.set_flags		= niu_set_flags,
+	.get_flags		= ethtool_op_get_flags,
 };
 
 static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent,
@@ -9754,6 +9776,12 @@ static void __devinit niu_device_announce(struct niu *np)
 	}
 }
 
+static void __devinit niu_set_basic_features(struct net_device *dev)
+{
+	dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM |
+			  NETIF_F_GRO | NETIF_F_RXHASH);
+}
+
 static int __devinit niu_pci_init_one(struct pci_dev *pdev,
 				      const struct pci_device_id *ent)
 {
@@ -9838,7 +9866,7 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev,
 		}
 	}
 
-	dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GRO);
+	niu_set_basic_features(dev);
 
 	np->regs = pci_ioremap_bar(pdev, 0);
 	if (!np->regs) {
@@ -10080,7 +10108,7 @@ static int __devinit niu_of_probe(struct of_device *op,
 		goto err_out_free_dev;
 	}
 
-	dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM);
+	niu_set_basic_features(dev);
 
 	np->regs = of_ioremap(&op->resource[1], 0,
 			      resource_size(&op->resource[1]),
diff --git a/drivers/net/niu.h b/drivers/net/niu.h
index 3bd0b59..d671546 100644
--- a/drivers/net/niu.h
+++ b/drivers/net/niu.h
@@ -2706,7 +2706,7 @@ struct rx_pkt_hdr0 {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u8	inputport:2,
 		maccheck:1,
-		class:4;
+		class:5;
 	u8	vlan:1,
 		llcsnap:1,
 		noport:1,
@@ -2715,7 +2715,7 @@ struct rx_pkt_hdr0 {
 		tres:2,
 		tzfvld:1;
 #elif defined(__BIG_ENDIAN_BITFIELD)
-	u8	class:4,
+	u8	class:5,
 		maccheck:1,
 		inputport:2;
 	u8	tzfvld:1,
@@ -2775,6 +2775,9 @@ struct rx_pkt_hdr1 {
 	/* Bits 7:0 of hash value, H1.  */
 	u8	hashval1_2;
 
+	u8	hwrsvd5;
+	u8	hwrsvd6;
+
 	u8	usrdata_0;	/* Bits 39:32 of user data.  */
 	u8	usrdata_1;	/* Bits 31:24 of user data.  */
 	u8	usrdata_2;	/* Bits 23:16 of user data.  */
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-22 21:19   ` David Miller
@ 2010-04-23  8:14     ` David Miller
  2010-04-23 15:32       ` Tom Herbert
  0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2010-04-23  8:14 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev

From: David Miller <davem@davemloft.net>
Date: Thu, 22 Apr 2010 14:19:22 -0700 (PDT)

> Also I have some ideas about what we can do if we have
> just the rxhash.  It seems we can avoid the type_trans
> overhead on the interrupting cpu.
> 
> Things like eth_type_trans() become a netdev operation rather than
> something drivers statically call by hand. ->ndo_type_trans or
> similar.
> 
> SKB has a state bit saying whether ->ndo_type_trans has been invoked
> yet on RX.
> 
> Drivers pass raw SKBs up into the stack.
> 
> We defer the ->ndo_type_trans as far as possible, for RPS when we have
> ->rxhash we can defer this all the way to the destination RPS cpu.
> 
> If we lack ->rxhash, the source cpu will need to invoke
> ->ndo_type_trans before it can begin parsing the packet.

I looked into implementing this and it doesn't work.  The
problem is GRO want's to look into the packet very early
and we want to batch GRO a set of packets into a big packet
before shooting them over to a remote cpu.

This reminds me that we can start using ->rxhash as a quick
mismatch check in the GRO flow matcher.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-23  8:14     ` David Miller
@ 2010-04-23 15:32       ` Tom Herbert
  2010-04-23 20:28         ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2010-04-23 15:32 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, netdev

> I looked into implementing this and it doesn't work.  The
> problem is GRO want's to look into the packet very early
> and we want to batch GRO a set of packets into a big packet
> before shooting them over to a remote cpu.
>

Can you reconsider? :-)  The majority of our servers see packet loads
which don't allow for much batching (a lot of small RPC messages), so
for those GRO is mostly unnecessary overhead and mechanisms that
improve unbatched packet performance are compelling.  Also, if a
device already does LRO, I don't see that GRO could add a lot of value
anyway.

Tom

> This reminds me that we can start using ->rxhash as a quick
> mismatch check in the GRO flow matcher.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] NIU support for skb->rxhash
  2010-04-23 15:32       ` Tom Herbert
@ 2010-04-23 20:28         ` David Miller
  0 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2010-04-23 20:28 UTC (permalink / raw)
  To: therbert; +Cc: eric.dumazet, netdev

From: Tom Herbert <therbert@google.com>
Date: Fri, 23 Apr 2010 08:32:02 -0700

>> I looked into implementing this and it doesn't work.  The
>> problem is GRO want's to look into the packet very early
>> and we want to batch GRO a set of packets into a big packet
>> before shooting them over to a remote cpu.
>>
> 
> Can you reconsider? :-)  The majority of our servers see packet loads
> which don't allow for much batching (a lot of small RPC messages), so
> for those GRO is mostly unnecessary overhead and mechanisms that
> improve unbatched packet performance are compelling.  Also, if a
> device already does LRO, I don't see that GRO could add a lot of value
> anyway.

LRO is extremely discouraged, because it has to be disabled
when any form of forwarding or bridging is enabled.  LRO is
done such that the input packet stream cannot be reconstituted
on transmit.

GRO on the other hand, allows proper reconstitution of the input
packet stream so it can be enabled unconditionally.

We are encouraging hardware manufacturers to tweak their receive
batching offload such that it matches the rules imposed by GRO
which allow proper reconsitution on transmit.

The fact is the code patch is there and it is going to be enabled all
the time, so we have to cope with it.


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-04-23 20:27 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-22 11:21 [PATCH] NIU support for skb->rxhash David Miller
2010-04-22 11:43 ` Eric Dumazet
2010-04-22 21:19   ` David Miller
2010-04-23  8:14     ` David Miller
2010-04-23 15:32       ` Tom Herbert
2010-04-23 20:28         ` David Miller
2010-04-22 16:21 ` Stephen Hemminger
2010-04-22 21:36   ` David Miller
2010-04-22 22:11     ` Stephen Hemminger
2010-04-22 22:24       ` David Miller
2010-04-22 21:37   ` David Miller
2010-04-22 22:53 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.