All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
@ 2015-06-01 12:55 Toshiaki Makita
  2015-06-01 14:12 ` Eric Dumazet
  2015-06-01 23:51 ` David Miller
  0 siblings, 2 replies; 8+ messages in thread
From: Toshiaki Makita @ 2015-06-01 12:55 UTC (permalink / raw)
  To: David S . Miller, Patrick McHardy; +Cc: Toshiaki Makita, Eric Dumazet, netdev

Currently packets with non-hardware-accelerated vlan cannot be handled
by GRO. This causes low performance for 802.1ad and stacked vlan, as their
vlan tags are currently not stripped by hardware.

This patch adds GRO support for non-hardware-accelerated vlan and
improves receive performance of them.

Test Environment:
 vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599)

Result:

- Before

$ netperf -t TCP_STREAM -H 192.168.20.2 -l 60
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    60.00    5233.17

Rx side CPU usage:
  %usr      %sys      %irq     %soft     %idle
  0.27     58.03      0.00     41.70      0.00

- After

$ netperf -t TCP_STREAM -H 192.168.20.2 -l 60
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    60.00    7586.85

Rx side CPU usage:
  %usr      %sys      %irq     %soft     %idle
  0.50     25.83      0.00     59.53     14.14

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
---
v2:
- Add compare_vlan_header() as per Eric Dumazet.

 include/linux/if_vlan.h | 20 +++++++++++
 net/8021q/vlan.c        | 94 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a40d298..67ce5bd 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -628,4 +628,24 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 	return features;
 }
 
+/**
+ * compare_vlan_header - Compare two vlan headers
+ * @h1: Pointer to vlan header
+ * @h2: Pointer to vlan header
+ *
+ * Compare two vlan headers, returns 0 if equal.
+ *
+ * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits.
+ */
+static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1,
+						const struct vlan_hdr *h2)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	return *(u32 *)h1 ^ *(u32 *)h2;
+#else
+	return ((__force u32)h1->h_vlan_TCI ^ (__force u32)h2->h_vlan_TCI) |
+	       ((__force u32)h1->h_vlan_encapsulated_proto ^
+		(__force u32)h2->h_vlan_encapsulated_proto);
+#endif
+}
 #endif /* !(_LINUX_IF_VLAN_H_) */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 59555f0..9c4f884 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -618,6 +618,90 @@ out:
 	return err;
 }
 
+static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct vlan_hdr *vhdr;
+	unsigned int hlen, off_vlan;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_vlan = skb_gro_offset(skb);
+	hlen = off_vlan + sizeof(*vhdr);
+	vhdr = skb_gro_header_fast(skb, off_vlan);
+	if (skb_gro_header_hard(skb, hlen)) {
+		vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
+		if (unlikely(!vhdr))
+			goto out;
+	}
+
+	type = vhdr->h_vlan_encapsulated_proto;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (!ptype)
+		goto out_unlock;
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		struct vlan_hdr *vhdr2;
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
+		if (compare_vlan_header(vhdr, vhdr2))
+			NAPI_GRO_CB(p)->same_flow = 0;
+	}
+
+	skb_gro_pull(skb, sizeof(*vhdr));
+	skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
+	__be16 type = vhdr->h_vlan_encapsulated_proto;
+	struct packet_offload *ptype;
+	int err = -ENOENT;
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype)
+		err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+
+	rcu_read_unlock();
+	return err;
+}
+
+static struct packet_offload vlan_packet_offloads[] __read_mostly = {
+	{
+		.type = cpu_to_be16(ETH_P_8021Q),
+		.callbacks = {
+			.gro_receive = vlan_gro_receive,
+			.gro_complete = vlan_gro_complete,
+		},
+	},
+	{
+		.type = cpu_to_be16(ETH_P_8021AD),
+		.callbacks = {
+			.gro_receive = vlan_gro_receive,
+			.gro_complete = vlan_gro_complete,
+		},
+	},
+};
+
 static int __net_init vlan_init_net(struct net *net)
 {
 	struct vlan_net *vn = net_generic(net, vlan_net_id);
@@ -645,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
 static int __init vlan_proto_init(void)
 {
 	int err;
+	unsigned int i;
 
 	pr_info("%s v%s\n", vlan_fullname, vlan_version);
 
@@ -668,6 +753,9 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err5;
 
+	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+		dev_add_offload(&vlan_packet_offloads[i]);
+
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
@@ -685,7 +773,13 @@ err0:
 
 static void __exit vlan_cleanup_module(void)
 {
+	unsigned int i;
+
 	vlan_ioctl_set(NULL);
+
+	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+		dev_remove_offload(&vlan_packet_offloads[i]);
+
 	vlan_netlink_fini();
 
 	unregister_netdevice_notifier(&vlan_notifier_block);
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 12:55 [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan Toshiaki Makita
@ 2015-06-01 14:12 ` Eric Dumazet
  2015-06-01 16:03   ` Toshiaki Makita
  2015-06-01 21:56   ` David Miller
  2015-06-01 23:51 ` David Miller
  1 sibling, 2 replies; 8+ messages in thread
From: Eric Dumazet @ 2015-06-01 14:12 UTC (permalink / raw)
  To: Toshiaki Makita; +Cc: David S . Miller, Patrick McHardy, netdev

On Mon, 2015-06-01 at 21:55 +0900, Toshiaki Makita wrote:

> @@ -668,6 +753,9 @@ static int __init vlan_proto_init(void)
>  	if (err < 0)
>  		goto err5;
>  
> +	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
> +		dev_add_offload(&vlan_packet_offloads[i]);
> +
>  	vlan_ioctl_set(vlan_ioctl_handler);
>  	return 0;

My concern about this is :

This might slow down GRO stack for other traffic, if dev_add_offload()
for vlan offloads is called after 
dev_add_offload(&ip_packet_offload) /
dev_add_offload(&ipv6_packet_offload)


This is because list_add_rcu is used and this inserts in front of the
offload_base list.

void dev_add_offload(struct packet_offload *po)
{
        struct list_head *head = &offload_base;

        spin_lock(&offload_lock);
        list_add_rcu(&po->list, head);
        spin_unlock(&offload_lock);
}

Can we ensure offload_base contains a sensible order of expected types ?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 14:12 ` Eric Dumazet
@ 2015-06-01 16:03   ` Toshiaki Makita
  2015-06-01 16:28     ` Eric Dumazet
  2015-06-01 21:56   ` David Miller
  1 sibling, 1 reply; 8+ messages in thread
From: Toshiaki Makita @ 2015-06-01 16:03 UTC (permalink / raw)
  To: Eric Dumazet, Toshiaki Makita; +Cc: David S . Miller, Patrick McHardy, netdev

On 15/06/01 (月) 23:12, Eric Dumazet wrote:
> On Mon, 2015-06-01 at 21:55 +0900, Toshiaki Makita wrote:
>
>> @@ -668,6 +753,9 @@ static int __init vlan_proto_init(void)
>>   	if (err < 0)
>>   		goto err5;
>>
>> +	for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
>> +		dev_add_offload(&vlan_packet_offloads[i]);
>> +
>>   	vlan_ioctl_set(vlan_ioctl_handler);
>>   	return 0;
>
> My concern about this is :
>
> This might slow down GRO stack for other traffic, if dev_add_offload()
> for vlan offloads is called after
> dev_add_offload(&ip_packet_offload) /
> dev_add_offload(&ipv6_packet_offload)

I didn't have that concern because there are already other similar 
offloads (eth, mpls_uc, mpls_mc). But indeed, they and this could slow 
down GRO stack.

>
>
> This is because list_add_rcu is used and this inserts in front of the
> offload_base list.
>
> void dev_add_offload(struct packet_offload *po)
> {
>          struct list_head *head = &offload_base;
>
>          spin_lock(&offload_lock);
>          list_add_rcu(&po->list, head);
>          spin_unlock(&offload_lock);
> }
>
> Can we ensure offload_base contains a sensible order of expected types ?

Add priority to packet_offload like nf_hook_ops?
Or have dev_add_offload() prioritize IP > IPV6 > others?

Toshiaki Makita

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 16:03   ` Toshiaki Makita
@ 2015-06-01 16:28     ` Eric Dumazet
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2015-06-01 16:28 UTC (permalink / raw)
  To: Toshiaki Makita
  Cc: Toshiaki Makita, David S . Miller, Patrick McHardy, netdev

On Tue, 2015-06-02 at 01:03 +0900, Toshiaki Makita wrote:

> I didn't have that concern because there are already other similar 
> offloads (eth, mpls_uc, mpls_mc). But indeed, they and this could slow 
> down GRO stack.

Right, but these mpls offloads are not installed on my kernels ;)

And I already checked that eth was installed before IPv4/IPv6,
although it might be pure luck.



> Add priority to packet_offload like nf_hook_ops?
> Or have dev_add_offload() prioritize IP > IPV6 > others?


You also could use a CONFIG_NET_VLAN_GRO module, so that only
users stuck with non accelerated vlan can load.

But yes, we might take care of this problem at some point.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 14:12 ` Eric Dumazet
  2015-06-01 16:03   ` Toshiaki Makita
@ 2015-06-01 21:56   ` David Miller
  2015-06-03  1:09     ` Simon Horman
  1 sibling, 1 reply; 8+ messages in thread
From: David Miller @ 2015-06-01 21:56 UTC (permalink / raw)
  To: eric.dumazet; +Cc: makita.toshiaki, kaber, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 01 Jun 2015 07:12:37 -0700

> Can we ensure offload_base contains a sensible order of expected
> types ?

This seemed easy enough to kill, so I pushed the following into net-next:

====================
[PATCH] net: Add priority to packet_offload objects.

When we scan a packet for GRO processing, we want to see the most
common packet types in the front of the offload_base list.

So add a priority field so we can handle this properly.

IPv4/IPv6 get the highest priority with the implicit zero priority
field.

Next comes ethernet with a priority of 10, and then we have the MPLS
types with a priority of 15.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Suggested-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 8 ++++++--
 net/ethernet/eth.c        | 1 +
 net/mpls/mpls_gso.c       | 2 ++
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 51f8d2f..6f5f71f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1997,6 +1997,7 @@ struct offload_callbacks {
 
 struct packet_offload {
 	__be16			 type;	/* This is really htons(ether_type). */
+	u16			 priority;
 	struct offload_callbacks callbacks;
 	struct list_head	 list;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 594163d..0602e91 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -469,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack);
  */
 void dev_add_offload(struct packet_offload *po)
 {
-	struct list_head *head = &offload_base;
+	struct packet_offload *elem;
 
 	spin_lock(&offload_lock);
-	list_add_rcu(&po->list, head);
+	list_for_each_entry(elem, &offload_base, list) {
+		if (po->priority < elem->priority)
+			break;
+	}
+	list_add_rcu(&po->list, elem->list.prev);
 	spin_unlock(&offload_lock);
 }
 EXPORT_SYMBOL(dev_add_offload);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index c3325bd..7d0e239 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -470,6 +470,7 @@ EXPORT_SYMBOL(eth_gro_complete);
 
 static struct packet_offload eth_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_TEB),
+	.priority = 10,
 	.callbacks = {
 		.gro_receive = eth_gro_receive,
 		.gro_complete = eth_gro_complete,
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 809df53..0183b32 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -62,6 +62,7 @@ out:
 
 static struct packet_offload mpls_mc_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_MPLS_MC),
+	.priority = 15,
 	.callbacks = {
 		.gso_segment    =	mpls_gso_segment,
 	},
@@ -69,6 +70,7 @@ static struct packet_offload mpls_mc_offload __read_mostly = {
 
 static struct packet_offload mpls_uc_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_MPLS_UC),
+	.priority = 15,
 	.callbacks = {
 		.gso_segment    =	mpls_gso_segment,
 	},
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 12:55 [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan Toshiaki Makita
  2015-06-01 14:12 ` Eric Dumazet
@ 2015-06-01 23:51 ` David Miller
  2015-06-02  1:24   ` Toshiaki Makita
  1 sibling, 1 reply; 8+ messages in thread
From: David Miller @ 2015-06-01 23:51 UTC (permalink / raw)
  To: makita.toshiaki; +Cc: kaber, eric.dumazet, netdev

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon,  1 Jun 2015 21:55:06 +0900

> Currently packets with non-hardware-accelerated vlan cannot be handled
> by GRO. This causes low performance for 802.1ad and stacked vlan, as their
> vlan tags are currently not stripped by hardware.
> 
> This patch adds GRO support for non-hardware-accelerated vlan and
> improves receive performance of them.
> 
> Test Environment:
>  vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599)
> 
> Result:
 ...
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

So I applied this, but gave the new offloads a priority of 10.

Thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 23:51 ` David Miller
@ 2015-06-02  1:24   ` Toshiaki Makita
  0 siblings, 0 replies; 8+ messages in thread
From: Toshiaki Makita @ 2015-06-02  1:24 UTC (permalink / raw)
  To: David Miller; +Cc: kaber, eric.dumazet, netdev

On 2015/06/02 8:51, David Miller wrote:
> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> Date: Mon,  1 Jun 2015 21:55:06 +0900
> 
>> Currently packets with non-hardware-accelerated vlan cannot be handled
>> by GRO. This causes low performance for 802.1ad and stacked vlan, as their
>> vlan tags are currently not stripped by hardware.
>>
>> This patch adds GRO support for non-hardware-accelerated vlan and
>> improves receive performance of them.
>>
>> Test Environment:
>>  vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599)
>>
>> Result:
>  ...
>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> 
> So I applied this, but gave the new offloads a priority of 10.

Thank you for handling the priority.

Thanks,
Toshiaki Makita

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan
  2015-06-01 21:56   ` David Miller
@ 2015-06-03  1:09     ` Simon Horman
  0 siblings, 0 replies; 8+ messages in thread
From: Simon Horman @ 2015-06-03  1:09 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, makita.toshiaki, kaber, netdev

On Mon, Jun 01, 2015 at 02:56:25PM -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 01 Jun 2015 07:12:37 -0700
> 
> > Can we ensure offload_base contains a sensible order of expected
> > types ?
> 
> This seemed easy enough to kill, so I pushed the following into net-next:
> 
> ====================
> [PATCH] net: Add priority to packet_offload objects.
> 
> When we scan a packet for GRO processing, we want to see the most
> common packet types in the front of the offload_base list.
> 
> So add a priority field so we can handle this properly.
> 
> IPv4/IPv6 get the highest priority with the implicit zero priority
> field.
> 
> Next comes ethernet with a priority of 10, and then we have the MPLS
> types with a priority of 15.

FWIW I have no objections to the priority assigned to MPLS.

> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Suggested-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-06-03  1:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-01 12:55 [PATCH v2 net-next] vlan: Add GRO support for non hardware accelerated vlan Toshiaki Makita
2015-06-01 14:12 ` Eric Dumazet
2015-06-01 16:03   ` Toshiaki Makita
2015-06-01 16:28     ` Eric Dumazet
2015-06-01 21:56   ` David Miller
2015-06-03  1:09     ` Simon Horman
2015-06-01 23:51 ` David Miller
2015-06-02  1:24   ` Toshiaki Makita

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.