All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
@ 2011-04-08  5:48 Jiri Pirko
  2011-04-12 21:16 ` David Miller
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-04-08  5:48 UTC (permalink / raw)
  To: netdev
  Cc: davem, shemminger, kaber, fubar, eric.dumazet, nicolas.2p.debian,
	andy, xiaosuo, jesse, ebiederm

Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is
enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into
vlan code in __netif_receive_skb - vlan_hwaccel_do_receive.

For non-rx-vlan-hw-accel however, tagged skb goes thru whole
__netif_receive_skb, it's untagged in ptype_base hander and reinjected

This incosistency is fixed by this patch. Vlan untagging happens early in
__netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers)
see the skb like it was untagged by hw.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

v1->v2:
	remove "inline" from vlan_core.c functions

---
 include/linux/if_vlan.h |   10 ++-
 net/8021q/vlan.c        |    8 --
 net/8021q/vlan.h        |    2 -
 net/8021q/vlan_core.c   |   85 +++++++++++++++++++++++-
 net/8021q/vlan_dev.c    |  173 -----------------------------------------------
 net/core/dev.c          |    8 ++-
 6 files changed, 99 insertions(+), 187 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 635e1fa..998b299 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -132,7 +132,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
-extern bool vlan_hwaccel_do_receive(struct sk_buff **skb);
+extern bool vlan_do_receive(struct sk_buff **skb);
+extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 extern gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb);
@@ -166,13 +167,18 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	return NET_XMIT_SUCCESS;
 }
 
-static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb)
+static inline bool vlan_do_receive(struct sk_buff **skb)
 {
 	if ((*skb)->vlan_tci & VLAN_VID_MASK)
 		(*skb)->pkt_type = PACKET_OTHERHOST;
 	return false;
 }
 
+inline struct sk_buff *vlan_untag(struct sk_buff *skb)
+{
+	return skb;
+}
+
 static inline gro_result_t
 vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 		 unsigned int vlan_tci, struct sk_buff *skb)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e47600b..14ef5ef 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -49,11 +49,6 @@ const char vlan_version[] = DRV_VERSION;
 static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
 static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
 
-static struct packet_type vlan_packet_type __read_mostly = {
-	.type = cpu_to_be16(ETH_P_8021Q),
-	.func = vlan_skb_recv, /* VLAN receive method */
-};
-
 /* End of global variables definitions. */
 
 static void vlan_group_free(struct vlan_group *grp)
@@ -684,7 +679,6 @@ static int __init vlan_proto_init(void)
 	if (err < 0)
 		goto err4;
 
-	dev_add_pack(&vlan_packet_type);
 	vlan_ioctl_set(vlan_ioctl_handler);
 	return 0;
 
@@ -705,8 +699,6 @@ static void __exit vlan_cleanup_module(void)
 
 	unregister_netdevice_notifier(&vlan_notifier_block);
 
-	dev_remove_pack(&vlan_packet_type);
-
 	unregister_pernet_subsys(&vlan_net_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5687c9b..c3408de 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -75,8 +75,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
 }
 
 /* found in vlan_dev.c */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev);
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index ce8e3ab..41495dc 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,7 +4,7 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
-bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
+bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
@@ -88,3 +88,86 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	return napi_gro_frags(napi);
 }
 EXPORT_SYMBOL(vlan_gro_frags);
+
+static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+{
+	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+		if (skb_cow(skb, skb_headroom(skb)) < 0)
+			skb = NULL;
+		if (skb) {
+			/* Lifted from Gleb's VLAN code... */
+			memmove(skb->data - ETH_HLEN,
+				skb->data - VLAN_ETH_HLEN, 12);
+			skb->mac_header += VLAN_HLEN;
+		}
+	}
+	return skb;
+}
+
+static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
+{
+	__be16 proto;
+	unsigned char *rawp;
+
+	/*
+	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+
+	proto = vhdr->h_vlan_encapsulated_proto;
+	if (ntohs(proto) >= 1536) {
+		skb->protocol = proto;
+		return;
+	}
+
+	rawp = skb->data;
+	if (*(unsigned short *) rawp == 0xFFFF)
+		/*
+		 * This is a magic hack to spot IPX packets. Older Novell
+		 * breaks the protocol design and runs IPX over 802.3 without
+		 * an 802.2 LLC layer. We look for FFFF which isn't a used
+		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+		 * but does for the rest.
+		 */
+		skb->protocol = htons(ETH_P_802_3);
+	else
+		/*
+		 * Real 802.2 LLC
+		 */
+		skb->protocol = htons(ETH_P_802_2);
+}
+
+struct sk_buff *vlan_untag(struct sk_buff *skb)
+{
+	struct vlan_hdr *vhdr;
+	u16 vlan_tci;
+
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		/* vlan_tci is already set-up so leave this for another time */
+		return skb;
+	}
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto err_free;
+
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+		goto err_free;
+
+	vhdr = (struct vlan_hdr *) skb->data;
+	vlan_tci = ntohs(vhdr->h_vlan_TCI);
+	__vlan_hwaccel_put_tag(skb, vlan_tci);
+
+	skb_pull_rcsum(skb, VLAN_HLEN);
+	vlan_set_encap_proto(skb, vhdr);
+
+	skb = vlan_check_reorder_header(skb);
+	if (unlikely(!skb))
+		goto err_free;
+
+	return skb;
+
+err_free:
+	kfree_skb(skb);
+	return NULL;
+}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b84a46b..d174c31 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -65,179 +65,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
 	return 0;
 }
 
-static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
-{
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
-	}
-
-	return skb;
-}
-
-static inline void vlan_set_encap_proto(struct sk_buff *skb,
-		struct vlan_hdr *vhdr)
-{
-	__be16 proto;
-	unsigned char *rawp;
-
-	/*
-	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		return;
-	}
-
-	rawp = skb->data;
-	if (*(unsigned short *)rawp == 0xFFFF)
-		/*
-		 * This is a magic hack to spot IPX packets. Older Novell
-		 * breaks the protocol design and runs IPX over 802.3 without
-		 * an 802.2 LLC layer. We look for FFFF which isn't a used
-		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
-		 * but does for the rest.
-		 */
-		skb->protocol = htons(ETH_P_802_3);
-	else
-		/*
-		 * Real 802.2 LLC
-		 */
-		skb->protocol = htons(ETH_P_802_2);
-}
-
-/*
- *	Determine the packet's protocol ID. The rule here is that we
- *	assume 802.3 if the type field is short enough to be a length.
- *	This is normal practice and works for any 'now in use' protocol.
- *
- *  Also, at this point we assume that we ARE dealing exclusively with
- *  VLAN packets, or packets that should be made into VLAN packets based
- *  on a default VLAN ID.
- *
- *  NOTE:  Should be similar to ethernet/eth.c.
- *
- *  SANITY NOTE:  This method is called when a packet is moving up the stack
- *                towards userland.  To get here, it would have already passed
- *                through the ethernet/eth.c eth_type_trans() method.
- *  SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
- *                 stored UNALIGNED in the memory.  RISC systems don't like
- *                 such cases very much...
- *  SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
- *  		    aligned, so there doesn't need to be any of the unaligned
- *  		    stuff.  It has been commented out now...  --Ben
- *
- */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-		  struct packet_type *ptype, struct net_device *orig_dev)
-{
-	struct vlan_hdr *vhdr;
-	struct vlan_pcpu_stats *rx_stats;
-	struct net_device *vlan_dev;
-	u16 vlan_id;
-	u16 vlan_tci;
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (skb == NULL)
-		goto err_free;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
-		goto err_free;
-
-	vhdr = (struct vlan_hdr *)skb->data;
-	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	vlan_id = vlan_tci & VLAN_VID_MASK;
-
-	rcu_read_lock();
-	vlan_dev = vlan_find_dev(dev, vlan_id);
-
-	/* If the VLAN device is defined, we use it.
-	 * If not, and the VID is 0, it is a 802.1p packet (not
-	 * really a VLAN), so we will just netif_rx it later to the
-	 * original interface, but with the skb->proto set to the
-	 * wrapped proto: we do nothing here.
-	 */
-
-	if (!vlan_dev) {
-		if (vlan_id) {
-			pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-				 __func__, vlan_id, dev->name);
-			goto err_unlock;
-		}
-		rx_stats = NULL;
-	} else {
-		skb->dev = vlan_dev;
-
-		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
-
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;
-		rx_stats->rx_bytes += skb->len;
-
-		skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
-
-		pr_debug("%s: priority: %u for TCI: %hu\n",
-			 __func__, skb->priority, vlan_tci);
-
-		switch (skb->pkt_type) {
-		case PACKET_BROADCAST:
-			/* Yeah, stats collect these together.. */
-			/* stats->broadcast ++; // no such counter :-( */
-			break;
-
-		case PACKET_MULTICAST:
-			rx_stats->rx_multicast++;
-			break;
-
-		case PACKET_OTHERHOST:
-			/* Our lower layer thinks this is not local, let's make
-			 * sure.
-			 * This allows the VLAN to have a different MAC than the
-			 * underlying device, and still route correctly.
-			 */
-			if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-						skb->dev->dev_addr))
-				skb->pkt_type = PACKET_HOST;
-			break;
-		default:
-			break;
-		}
-		u64_stats_update_end(&rx_stats->syncp);
-	}
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-	vlan_set_encap_proto(skb, vhdr);
-
-	if (vlan_dev) {
-		skb = vlan_check_reorder_header(skb);
-		if (!skb) {
-			rx_stats->rx_errors++;
-			goto err_unlock;
-		}
-	}
-
-	netif_rx(skb);
-
-	rcu_read_unlock();
-	return NET_RX_SUCCESS;
-
-err_unlock:
-	rcu_read_unlock();
-err_free:
-	atomic_long_inc(&dev->rx_dropped);
-	kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
 static inline u16
 vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 5d0b4f6..52aed0a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3130,6 +3130,12 @@ another_round:
 
 	__this_cpu_inc(softnet_data.processed);
 
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+		skb = vlan_untag(skb);
+		if (unlikely(!skb))
+			goto out;
+	}
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3177,7 +3183,7 @@ ncls:
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_hwaccel_do_receive(&skb)) {
+		if (vlan_do_receive(&skb)) {
 			ret = __netif_receive_skb(skb);
 			goto out;
 		} else if (unlikely(!skb))
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-04-08  5:48 [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel Jiri Pirko
@ 2011-04-12 21:16 ` David Miller
  2011-05-21  1:11   ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: David Miller @ 2011-04-12 21:16 UTC (permalink / raw)
  To: jpirko
  Cc: netdev, shemminger, kaber, fubar, eric.dumazet,
	nicolas.2p.debian, andy, xiaosuo, jesse, ebiederm

From: Jiri Pirko <jpirko@redhat.com>
Date: Fri,  8 Apr 2011 07:48:33 +0200

> Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is
> enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into
> vlan code in __netif_receive_skb - vlan_hwaccel_do_receive.
> 
> For non-rx-vlan-hw-accel however, tagged skb goes thru whole
> __netif_receive_skb, it's untagged in ptype_base hander and reinjected
> 
> This incosistency is fixed by this patch. Vlan untagging happens early in
> __netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers)
> see the skb like it was untagged by hw.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> 
> v1->v2:
> 	remove "inline" from vlan_core.c functions

Ok, I've applied this, let's see what happens :-)

Thanks!

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-04-12 21:16 ` David Miller
@ 2011-05-21  1:11   ` Changli Gao
  2011-05-21  7:29     ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-05-21  1:11 UTC (permalink / raw)
  To: David Miller
  Cc: jpirko, netdev, shemminger, kaber, fubar, eric.dumazet,
	nicolas.2p.debian, andy, jesse, ebiederm

On Wed, Apr 13, 2011 at 5:16 AM, David Miller <davem@davemloft.net> wrote:
> From: Jiri Pirko <jpirko@redhat.com>
> Date: Fri,  8 Apr 2011 07:48:33 +0200
>
>> Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is
>> enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into
>> vlan code in __netif_receive_skb - vlan_hwaccel_do_receive.
>>
>> For non-rx-vlan-hw-accel however, tagged skb goes thru whole
>> __netif_receive_skb, it's untagged in ptype_base hander and reinjected
>>
>> This incosistency is fixed by this patch. Vlan untagging happens early in
>> __netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers)
>> see the skb like it was untagged by hw.
>>
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>>
>> v1->v2:
>>       remove "inline" from vlan_core.c functions
>
> Ok, I've applied this, let's see what happens :-)
>

I think we should revert it.

File: net/8021q/vlan_core.c:

161         skb_pull_rcsum(skb, VLAN_HLEN);

skb->data and skb->len are updated, but network_header and
transport_header are left unchanged. This will break the assumption in
net_sched.

  for example:
  file: cls_u32.c
  104         unsigned int off = skb_network_offset(skb);
  After this patch, skb_network_offset may be negative.

162         vlan_set_encap_proto(skb, vhdr);
163
164         skb = vlan_check_reorder_header(skb);
vlan_check_reorder_header assume skb->dev is a vlan_dev. Even though
the correct dev is assigned temporarily, we should not reorder the
header here as HW accelerated vlan RX does, as this may breaks the
bridging comes later.

165         if (unlikely(!skb))
166                 goto err_free;

The hardware accelerated vlan RX doesn't always do the "right" things
as it strips the vlan header, so we should not emulate it in software
all the time.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-21  1:11   ` Changli Gao
@ 2011-05-21  7:29     ` Jiri Pirko
  2011-05-21 10:43       ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-05-21  7:29 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	nicolas.2p.debian, andy, jesse, ebiederm

Sat, May 21, 2011 at 03:11:05AM CEST, xiaosuo@gmail.com wrote:
>On Wed, Apr 13, 2011 at 5:16 AM, David Miller <davem@davemloft.net> wrote:
>> From: Jiri Pirko <jpirko@redhat.com>
>> Date: Fri,  8 Apr 2011 07:48:33 +0200
>>
>>> Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is
>>> enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into
>>> vlan code in __netif_receive_skb - vlan_hwaccel_do_receive.
>>>
>>> For non-rx-vlan-hw-accel however, tagged skb goes thru whole
>>> __netif_receive_skb, it's untagged in ptype_base hander and reinjected
>>>
>>> This incosistency is fixed by this patch. Vlan untagging happens early in
>>> __netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers)
>>> see the skb like it was untagged by hw.
>>>
>>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>>>
>>> v1->v2:
>>>       remove "inline" from vlan_core.c functions
>>
>> Ok, I've applied this, let's see what happens :-)
>>
>
>I think we should revert it.
>
>File: net/8021q/vlan_core.c:
>
>161         skb_pull_rcsum(skb, VLAN_HLEN);
>
>skb->data and skb->len are updated, but network_header and
>transport_header are left unchanged. This will break the assumption in
>net_sched.
>
>  for example:
>  file: cls_u32.c
>  104         unsigned int off = skb_network_offset(skb);
>  After this patch, skb_network_offset may be negative.
>
>162         vlan_set_encap_proto(skb, vhdr);
>163
>164         skb = vlan_check_reorder_header(skb);
>vlan_check_reorder_header assume skb->dev is a vlan_dev. Even though
>the correct dev is assigned temporarily, we should not reorder the
>header here as HW accelerated vlan RX does, as this may breaks the
>bridging comes later.
>
>165         if (unlikely(!skb))
>166                 goto err_free;
>
>The hardware accelerated vlan RX doesn't always do the "right" things
>as it strips the vlan header, so we should not emulate it in software
>all the time.

I do not see a reason why to not emulate that. To make paths as much
similar as they can be, that is the point of this patch.

I think it would be better to fix an issue you are pointing at
rather that revert this.

Jirka

>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-21  7:29     ` Jiri Pirko
@ 2011-05-21 10:43       ` Changli Gao
  2011-05-21 13:17         ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-05-21 10:43 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	nicolas.2p.debian, andy, jesse, ebiederm

On Sat, May 21, 2011 at 3:29 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>
> I do not see a reason why to not emulate that. To make paths as much
> similar as they can be, that is the point of this patch.
>
> I think it would be better to fix an issue you are pointing at
> rather that revert this.
>

In my opinion, the hardware accelerated VLAN RX is just a special case
of the non hardware accelerated VLAN RX with header reordering. For
promiscuous NICs and bridges, hw-accel-vlan-rx is just disabled.

I have tried to fix all the issues, but failed in a clean way. Please try.

Thanks.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-21 10:43       ` Changli Gao
@ 2011-05-21 13:17         ` Nicolas de Pesloüan
  2011-05-21 17:54           ` Jesse Gross
  0 siblings, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-21 13:17 UTC (permalink / raw)
  To: Changli Gao, Jiri Pirko
  Cc: David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	andy, jesse, ebiederm

Le 21/05/2011 12:43, Changli Gao a écrit :
> On Sat, May 21, 2011 at 3:29 PM, Jiri Pirko<jpirko@redhat.com>  wrote:
>>
>> I do not see a reason why to not emulate that. To make paths as much
>> similar as they can be, that is the point of this patch.
>>
>> I think it would be better to fix an issue you are pointing at
>> rather that revert this.
>>
>
> In my opinion, the hardware accelerated VLAN RX is just a special case
> of the non hardware accelerated VLAN RX with header reordering. For
> promiscuous NICs and bridges, hw-accel-vlan-rx is just disabled.

I strongly agree with that.

The fact that a skb holds a VLAN tag is not a good enough reason to always remove this tag before 
giving the skb to protocol handlers.

If the user ask for VLAN tag removal, we should remove the tag, possibly using hw-accel untagging if 
available else software untagging. And if the user doesn't ask for tag removal, we should not untag.

In other words, if the user doesn't setup any vlan interface on top of another interface, there is 
no reason to untag the skb : both hw-accel untagging and software untagging should be disabled.

Also, the skb should be delivered untagged or tagged to protocol handlers, depending on the 
particular device the protocol handlers registered at. The same skb might need to be delivered 
tagged to a ptype_all handler registered at eth0 and untagged to a ptype_base handler registered at 
eth0.100.

rx_handler still sounds the right place to do software untagging, because software untagging is a 
per-device process. A vlan_untagging rx_handler should be installed on the devices that have vlan 
child device and (that lack hw-accel or where hw-accel is disabled). This would also cause 
__netif_receive_skb() not to hold any vlan specific code, which is cleaner.

I perfectly understand that this might require several rx_handlers per device, for advanced setup, 
but I think for long that several rx_handlers is a powerful feature we need.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-21 13:17         ` Nicolas de Pesloüan
@ 2011-05-21 17:54           ` Jesse Gross
  2011-05-21 22:15             ` Stephen Hemminger
  2011-05-22  2:59             ` Nicolas de Pesloüan
  0 siblings, 2 replies; 92+ messages in thread
From: Jesse Gross @ 2011-05-21 17:54 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Changli Gao, Jiri Pirko, David Miller, netdev, shemminger, kaber,
	fubar, eric.dumazet, andy, ebiederm

On Sat, May 21, 2011 at 6:17 AM, Nicolas de Pesloüan
<nicolas.2p.debian@gmail.com> wrote:
> Le 21/05/2011 12:43, Changli Gao a écrit :
>>
>> On Sat, May 21, 2011 at 3:29 PM, Jiri Pirko<jpirko@redhat.com>  wrote:
>>>
>>> I do not see a reason why to not emulate that. To make paths as much
>>> similar as they can be, that is the point of this patch.
>>>
>>> I think it would be better to fix an issue you are pointing at
>>> rather that revert this.
>>>
>>
>> In my opinion, the hardware accelerated VLAN RX is just a special case
>> of the non hardware accelerated VLAN RX with header reordering. For
>> promiscuous NICs and bridges, hw-accel-vlan-rx is just disabled.
>
> I strongly agree with that.
>
> The fact that a skb holds a VLAN tag is not a good enough reason to always
> remove this tag before giving the skb to protocol handlers.
>
> If the user ask for VLAN tag removal, we should remove the tag, possibly
> using hw-accel untagging if available else software untagging. And if the
> user doesn't ask for tag removal, we should not untag.
>
> In other words, if the user doesn't setup any vlan interface on top of
> another interface, there is no reason to untag the skb : both hw-accel
> untagging and software untagging should be disabled.

The problem is that for most hardware vlan stripping is actually the
common case, not the exception.  When you try to disable it frequently
there are hidden restrictions that cause problems.  A few examples:
* Some NICs can't disable stripping at all.
* Some NICs can only do tag insertion if stripping is configured on receive.
* Some NICs can only do hardware offloads (checksum, TSO) if tag
insertion is used on transmit.

So if you are using vlans then acceleration is pretty much a fact of
life and the best possible way we can deal with it is to make the
accelerated and non-accelerated cases behave as similarly as possible.

Before we were trying to dynamically enable/disable vlan acceleration
based on whether a vlan group was configured and that worked fine for
vlan devices because acceleration was enabled for it.  However, it
caused an endless series of problems for other devices (such as
bridging while trunking vlans) due to lost tags, driver bugs, and the
restrictions above.  Some of these can be fixed with driver changes
but the fact is that dynamically changing behavior just leads to
problems for the less common cases that are supposedly being fixed.
It's much better to do the same thing all the time.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-21 17:54           ` Jesse Gross
@ 2011-05-21 22:15             ` Stephen Hemminger
  2011-05-22  2:59             ` Nicolas de Pesloüan
  1 sibling, 0 replies; 92+ messages in thread
From: Stephen Hemminger @ 2011-05-21 22:15 UTC (permalink / raw)
  To: Jesse Gross
  Cc: Nicolas de Pesloüan, Changli Gao, Jiri Pirko, David Miller,
	netdev, kaber, fubar, eric.dumazet, andy, ebiederm

On Sat, 21 May 2011 10:54:39 -0700
Jesse Gross <jesse@nicira.com> wrote:

> On Sat, May 21, 2011 at 6:17 AM, Nicolas de Pesloüan
> <nicolas.2p.debian@gmail.com> wrote:
> > Le 21/05/2011 12:43, Changli Gao a écrit :
> >>
> >> On Sat, May 21, 2011 at 3:29 PM, Jiri Pirko<jpirko@redhat.com>  wrote:
> >>>
> >>> I do not see a reason why to not emulate that. To make paths as much
> >>> similar as they can be, that is the point of this patch.
> >>>
> >>> I think it would be better to fix an issue you are pointing at
> >>> rather that revert this.
> >>>
> >>
> >> In my opinion, the hardware accelerated VLAN RX is just a special case
> >> of the non hardware accelerated VLAN RX with header reordering. For
> >> promiscuous NICs and bridges, hw-accel-vlan-rx is just disabled.
> >
> > I strongly agree with that.
> >
> > The fact that a skb holds a VLAN tag is not a good enough reason to always
> > remove this tag before giving the skb to protocol handlers.
> >
> > If the user ask for VLAN tag removal, we should remove the tag, possibly
> > using hw-accel untagging if available else software untagging. And if the
> > user doesn't ask for tag removal, we should not untag.
> >
> > In other words, if the user doesn't setup any vlan interface on top of
> > another interface, there is no reason to untag the skb : both hw-accel
> > untagging and software untagging should be disabled.
> 
> The problem is that for most hardware vlan stripping is actually the
> common case, not the exception.  When you try to disable it frequently
> there are hidden restrictions that cause problems.  A few examples:
> * Some NICs can't disable stripping at all.
> * Some NICs can only do tag insertion if stripping is configured on receive.
> * Some NICs can only do hardware offloads (checksum, TSO) if tag
> insertion is used on transmit.
> 
> So if you are using vlans then acceleration is pretty much a fact of
> life and the best possible way we can deal with it is to make the
> accelerated and non-accelerated cases behave as similarly as possible.
> 
> Before we were trying to dynamically enable/disable vlan acceleration
> based on whether a vlan group was configured and that worked fine for
> vlan devices because acceleration was enabled for it.  However, it
> caused an endless series of problems for other devices (such as
> bridging while trunking vlans) due to lost tags, driver bugs, and the
> restrictions above.  Some of these can be fixed with driver changes
> but the fact is that dynamically changing behavior just leads to
> problems for the less common cases that are supposedly being fixed.
> It's much better to do the same thing all the time.
> 

The old code was also fundamentally broken if doing any kind of
Qos because the TC filter would have to know whether skb had extra
overhead of VLAN tag at the start. This meant the TC filter setup
had to be different depending on whether the hardware supported HW
acceleration or not.
-- 

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-21 17:54           ` Jesse Gross
  2011-05-21 22:15             ` Stephen Hemminger
@ 2011-05-22  2:59             ` Nicolas de Pesloüan
  2011-05-22  6:29               ` Jiri Pirko
  1 sibling, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-22  2:59 UTC (permalink / raw)
  To: Jesse Gross
  Cc: Changli Gao, Jiri Pirko, David Miller, netdev, shemminger, kaber,
	fubar, eric.dumazet, andy, ebiederm

Le 21/05/2011 19:54, Jesse Gross a écrit :
> On Sat, May 21, 2011 at 6:17 AM, Nicolas de Pesloüan
> <nicolas.2p.debian@gmail.com>  wrote:
>> Le 21/05/2011 12:43, Changli Gao a écrit :
>>>
>>> On Sat, May 21, 2011 at 3:29 PM, Jiri Pirko<jpirko@redhat.com>    wrote:
>>>>
>>>> I do not see a reason why to not emulate that. To make paths as much
>>>> similar as they can be, that is the point of this patch.
>>>>
>>>> I think it would be better to fix an issue you are pointing at
>>>> rather that revert this.
>>>>
>>>
>>> In my opinion, the hardware accelerated VLAN RX is just a special case
>>> of the non hardware accelerated VLAN RX with header reordering. For
>>> promiscuous NICs and bridges, hw-accel-vlan-rx is just disabled.
>>
>> I strongly agree with that.
>>
>> The fact that a skb holds a VLAN tag is not a good enough reason to always
>> remove this tag before giving the skb to protocol handlers.
>>
>> If the user ask for VLAN tag removal, we should remove the tag, possibly
>> using hw-accel untagging if available else software untagging. And if the
>> user doesn't ask for tag removal, we should not untag.
>>
>> In other words, if the user doesn't setup any vlan interface on top of
>> another interface, there is no reason to untag the skb : both hw-accel
>> untagging and software untagging should be disabled.
>
> The problem is that for most hardware vlan stripping is actually the
> common case, not the exception.  When you try to disable it frequently
> there are hidden restrictions that cause problems.  A few examples:
> * Some NICs can't disable stripping at all.
> * Some NICs can only do tag insertion if stripping is configured on receive.
> * Some NICs can only do hardware offloads (checksum, TSO) if tag
> insertion is used on transmit.
>
> So if you are using vlans then acceleration is pretty much a fact of
> life and the best possible way we can deal with it is to make the
> accelerated and non-accelerated cases behave as similarly as possible.
>
> Before we were trying to dynamically enable/disable vlan acceleration
> based on whether a vlan group was configured and that worked fine for
> vlan devices because acceleration was enabled for it.  However, it
> caused an endless series of problems for other devices (such as
> bridging while trunking vlans) due to lost tags, driver bugs, and the
> restrictions above.  Some of these can be fixed with driver changes
> but the fact is that dynamically changing behavior just leads to
> problems for the less common cases that are supposedly being fixed.
> It's much better to do the same thing all the time.

Thanks for clarifying.

So, because many limited/buggy hardware exist, we must mimic the behavior in software. 'Sounds good 
to me.

And because some setups may still require the skb not to be untagged, may be we need the ability to 
re-tag the skb in some situations... When a protocol handler or rx_handler is explicitly registered 
on a net_device which expect to receive tagged skb, we should deliver tagged skb to it... Arguably, 
this may sound incredible for the general case, but may be required for not-so-special cases like 
bridge or protocol analyzer.

Of course, I don't say we should always re-tag: if no protocol handler nor rx_handler were 
registered on the parent interface, we don't need the extra work of re-tagging.

What I say is that it shouldn't be the job of protocol handlers or rx_handlers that expect the skb 
to be tagged to fix the improper untagging. A generic feature should do it when necessary.

And all this being said, it doesn't mean that we should pollute __netif_receive_skb with special 
code for vlan handling.

May be, as suggested by Eric W. Biederman in the V1 thread for this patch, software untagging for 
the first level of header should happen before __netif_receive_skb if we only try to mimic hardware 
behavior.

And possible later untagging (due to vlan nesting) should be done generically inside 
__netif_receive_skb, using rx_handler when appropriate. This would cleanup the general case where no 
vlan is involved at all.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  2:59             ` Nicolas de Pesloüan
@ 2011-05-22  6:29               ` Jiri Pirko
  2011-05-22  6:34                 ` Eric W. Biederman
  2011-05-22  8:38                 ` [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel Changli Gao
  0 siblings, 2 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-05-22  6:29 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Jesse Gross, Changli Gao, David Miller, netdev, shemminger,
	kaber, fubar, eric.dumazet, andy, ebiederm

Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:

<snip>
>
>And because some setups may still require the skb not to be untagged,
>may be we need the ability to re-tag the skb in some situations...
>When a protocol handler or rx_handler is explicitly registered on a
>net_device which expect to receive tagged skb, we should deliver
>tagged skb to it... Arguably, this may sound incredible for the
>general case, but may be required for not-so-special cases like
>bridge or protocol analyzer.

Wait, what setups/code require the skb not to be untagged? If there's
such, it should be fixed.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  6:29               ` Jiri Pirko
@ 2011-05-22  6:34                 ` Eric W. Biederman
  2011-05-22  8:34                   ` Nicolas de Pesloüan
  2011-05-22 16:11                   ` Jesse Gross
  2011-05-22  8:38                 ` [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel Changli Gao
  1 sibling, 2 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22  6:34 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Nicolas de Pesloüan, Jesse Gross, Changli Gao, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

Jiri Pirko <jpirko@redhat.com> writes:

> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>
> <snip>
>>
>>And because some setups may still require the skb not to be untagged,
>>may be we need the ability to re-tag the skb in some situations...
>>When a protocol handler or rx_handler is explicitly registered on a
>>net_device which expect to receive tagged skb, we should deliver
>>tagged skb to it... Arguably, this may sound incredible for the
>>general case, but may be required for not-so-special cases like
>>bridge or protocol analyzer.
>
> Wait, what setups/code require the skb not to be untagged? If there's
> such, it should be fixed.

tcpdump on the non-vlan interface for one.

Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  6:34                 ` Eric W. Biederman
@ 2011-05-22  8:34                   ` Nicolas de Pesloüan
  2011-05-22  8:52                     ` Michał Mirosław
  2011-05-22 16:11                   ` Jesse Gross
  1 sibling, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-22  8:34 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Eric W. Biederman, Jesse Gross, Changli Gao, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

Le 22/05/2011 08:34, Eric W. Biederman a écrit :
> Jiri Pirko<jpirko@redhat.com>  writes:
>
>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>
>> <snip>
>>>
>>> And because some setups may still require the skb not to be untagged,
>>> may be we need the ability to re-tag the skb in some situations...
>>> When a protocol handler or rx_handler is explicitly registered on a
>>> net_device which expect to receive tagged skb, we should deliver
>>> tagged skb to it... Arguably, this may sound incredible for the
>>> general case, but may be required for not-so-special cases like
>>> bridge or protocol analyzer.
>>
>> Wait, what setups/code require the skb not to be untagged? If there's
>> such, it should be fixed.
>
> tcpdump on the non-vlan interface for one.

bridge is another. More precisely, there is a difference between the following two setups:

1/ eth0 - eth0.100 - br0 - eth1.200 - eth1

2/ eth0 - br0 - eth1

In case 1, it is normal and desirable for the bridge to see untagged skb.

In case 2, it is desirable for the bridge to see untouched (possibly tagged) skb. If current bridge 
implementation is able to handle skb from which we removed a tag, in this situation, it means that 
bridge currently "fix improper untagging" by itself, by forcing re-tagging on output. I think is 
should not be the job of protocol handlers to fix this. Again, a generic feature should to it when 
necessary.

Think of the following setups:

3/ eth0 - br0 - eth1.200 - eth1.
4/ eth0 - eth0.100 - br0 - eth1

What if one expect this setup to add (3) or remove (4) one level of vlan nesting? This is precisely 
what this setup suggest. How can we instruct the bridge to do so? It is not the bridge 
responsibility to do any vlan processing. bridge is expected to... bridge !

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  6:29               ` Jiri Pirko
  2011-05-22  6:34                 ` Eric W. Biederman
@ 2011-05-22  8:38                 ` Changli Gao
  2011-05-22  9:37                   ` Jiri Pirko
  1 sibling, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-05-22  8:38 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Nicolas de Pesloüan, Jesse Gross, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, ebiederm

On Sun, May 22, 2011 at 2:29 PM, Jiri Pirko <jpirko@redhat.com> wrote:
> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>
> <snip>
>>
>>And because some setups may still require the skb not to be untagged,
>>may be we need the ability to re-tag the skb in some situations...
>>When a protocol handler or rx_handler is explicitly registered on a
>>net_device which expect to receive tagged skb, we should deliver
>>tagged skb to it... Arguably, this may sound incredible for the
>>general case, but may be required for not-so-special cases like
>>bridge or protocol analyzer.
>
> Wait, what setups/code require the skb not to be untagged? If there's
> such, it should be fixed.
>

For a transparent bridge with ports: eth0 and eth1, the vlan tags
need to be preserved.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  8:34                   ` Nicolas de Pesloüan
@ 2011-05-22  8:52                     ` Michał Mirosław
  2011-05-22  9:10                       ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Michał Mirosław @ 2011-05-22  8:52 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Jiri Pirko, Eric W. Biederman, Jesse Gross, Changli Gao,
	David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	andy

2011/5/22 Nicolas de Pesloüan <nicolas.2p.debian@gmail.com>:
> Le 22/05/2011 08:34, Eric W. Biederman a écrit :
>>
>> Jiri Pirko<jpirko@redhat.com>  writes:
>>
>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>>
>>> <snip>
>>>>
>>>> And because some setups may still require the skb not to be untagged,
>>>> may be we need the ability to re-tag the skb in some situations...
>>>> When a protocol handler or rx_handler is explicitly registered on a
>>>> net_device which expect to receive tagged skb, we should deliver
>>>> tagged skb to it... Arguably, this may sound incredible for the
>>>> general case, but may be required for not-so-special cases like
>>>> bridge or protocol analyzer.
>>>
>>> Wait, what setups/code require the skb not to be untagged? If there's
>>> such, it should be fixed.
>>
>> tcpdump on the non-vlan interface for one.
> bridge is another. More precisely, there is a difference between the
> following two setups:
>
> 1/ eth0 - eth0.100 - br0 - eth1.200 - eth1
>
> 2/ eth0 - br0 - eth1
>
> In case 1, it is normal and desirable for the bridge to see untagged skb.
>
> In case 2, it is desirable for the bridge to see untouched (possibly tagged)
> skb. If current bridge implementation is able to handle skb from which we
> removed a tag, in this situation, it means that bridge currently "fix
> improper untagging" by itself, by forcing re-tagging on output. I think is
> should not be the job of protocol handlers to fix this. Again, a generic
> feature should to it when necessary.
>
> Think of the following setups:
>
> 3/ eth0 - br0 - eth1.200 - eth1.
> 4/ eth0 - eth0.100 - br0 - eth1
>
> What if one expect this setup to add (3) or remove (4) one level of vlan
> nesting? This is precisely what this setup suggest. How can we instruct the
> bridge to do so? It is not the bridge responsibility to do any vlan
> processing. bridge is expected to... bridge !

I assumed that this untaging Jiri is implementing does not remove the
tag. It moves the information from skb->data to skb->vlan_tci, but the
information contained is not otherwise changing. All your examples
should work regardless of where the tag is stored.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  8:52                     ` Michał Mirosław
@ 2011-05-22  9:10                       ` Nicolas de Pesloüan
  2011-05-22  9:20                         ` Michał Mirosław
  0 siblings, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-22  9:10 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Jiri Pirko, Eric W. Biederman, Jesse Gross, Changli Gao,
	David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	andy

Le 22/05/2011 10:52, Michał Mirosław a écrit :
> 2011/5/22 Nicolas de Pesloüan<nicolas.2p.debian@gmail.com>:
>> Le 22/05/2011 08:34, Eric W. Biederman a écrit :
>>>
>>> Jiri Pirko<jpirko@redhat.com>    writes:
>>>
>>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>>>
>>>> <snip>
>>>>>
>>>>> And because some setups may still require the skb not to be untagged,
>>>>> may be we need the ability to re-tag the skb in some situations...
>>>>> When a protocol handler or rx_handler is explicitly registered on a
>>>>> net_device which expect to receive tagged skb, we should deliver
>>>>> tagged skb to it... Arguably, this may sound incredible for the
>>>>> general case, but may be required for not-so-special cases like
>>>>> bridge or protocol analyzer.
>>>>
>>>> Wait, what setups/code require the skb not to be untagged? If there's
>>>> such, it should be fixed.
>>>
>>> tcpdump on the non-vlan interface for one.
>> bridge is another. More precisely, there is a difference between the
>> following two setups:
>>
>> 1/ eth0 - eth0.100 - br0 - eth1.200 - eth1
>>
>> 2/ eth0 - br0 - eth1
>>
>> In case 1, it is normal and desirable for the bridge to see untagged skb.
>>
>> In case 2, it is desirable for the bridge to see untouched (possibly tagged)
>> skb. If current bridge implementation is able to handle skb from which we
>> removed a tag, in this situation, it means that bridge currently "fix
>> improper untagging" by itself, by forcing re-tagging on output. I think is
>> should not be the job of protocol handlers to fix this. Again, a generic
>> feature should to it when necessary.
>>
>> Think of the following setups:
>>
>> 3/ eth0 - br0 - eth1.200 - eth1.
>> 4/ eth0 - eth0.100 - br0 - eth1
>>
>> What if one expect this setup to add (3) or remove (4) one level of vlan
>> nesting? This is precisely what this setup suggest. How can we instruct the
>> bridge to do so? It is not the bridge responsibility to do any vlan
>> processing. bridge is expected to... bridge !
>
> I assumed that this untaging Jiri is implementing does not remove the
> tag. It moves the information from skb->data to skb->vlan_tci, but the
> information contained is not otherwise changing. All your examples
> should work regardless of where the tag is stored.

I assumed (but didn't tested) that this untagging also change the starting point of the payload of 
the packet. So protocol handlers expecting to have the raw packet won't see the vlan header.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  9:10                       ` Nicolas de Pesloüan
@ 2011-05-22  9:20                         ` Michał Mirosław
  2011-05-22  9:36                           ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Michał Mirosław @ 2011-05-22  9:20 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Jiri Pirko, Eric W. Biederman, Jesse Gross, Changli Gao,
	David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	andy

W dniu 22 maja 2011 11:10 użytkownik Nicolas de Pesloüan
<nicolas.2p.debian@gmail.com> napisał:
> Le 22/05/2011 10:52, Michał Mirosław a écrit :
>>
>> 2011/5/22 Nicolas de Pesloüan<nicolas.2p.debian@gmail.com>:
>>>
>>> Le 22/05/2011 08:34, Eric W. Biederman a écrit :
>>>>
>>>> Jiri Pirko<jpirko@redhat.com>    writes:
>>>>
>>>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com
>>>>> wrote:
>>>>>
>>>>> <snip>
>>>>>>
>>>>>> And because some setups may still require the skb not to be untagged,
>>>>>> may be we need the ability to re-tag the skb in some situations...
>>>>>> When a protocol handler or rx_handler is explicitly registered on a
>>>>>> net_device which expect to receive tagged skb, we should deliver
>>>>>> tagged skb to it... Arguably, this may sound incredible for the
>>>>>> general case, but may be required for not-so-special cases like
>>>>>> bridge or protocol analyzer.
>>>>>
>>>>> Wait, what setups/code require the skb not to be untagged? If there's
>>>>> such, it should be fixed.
>>>>
>>>> tcpdump on the non-vlan interface for one.
>>>
>>> bridge is another. More precisely, there is a difference between the
>>> following two setups:
>>>
>>> 1/ eth0 - eth0.100 - br0 - eth1.200 - eth1
>>>
>>> 2/ eth0 - br0 - eth1
>>>
>>> In case 1, it is normal and desirable for the bridge to see untagged skb.
>>>
>>> In case 2, it is desirable for the bridge to see untouched (possibly
>>> tagged)
>>> skb. If current bridge implementation is able to handle skb from which we
>>> removed a tag, in this situation, it means that bridge currently "fix
>>> improper untagging" by itself, by forcing re-tagging on output. I think
>>> is
>>> should not be the job of protocol handlers to fix this. Again, a generic
>>> feature should to it when necessary.
>>>
>>> Think of the following setups:
>>>
>>> 3/ eth0 - br0 - eth1.200 - eth1.
>>> 4/ eth0 - eth0.100 - br0 - eth1
>>>
>>> What if one expect this setup to add (3) or remove (4) one level of vlan
>>> nesting? This is precisely what this setup suggest. How can we instruct
>>> the
>>> bridge to do so? It is not the bridge responsibility to do any vlan
>>> processing. bridge is expected to... bridge !
>>
>> I assumed that this untaging Jiri is implementing does not remove the
>> tag. It moves the information from skb->data to skb->vlan_tci, but the
>> information contained is not otherwise changing. All your examples
>> should work regardless of where the tag is stored.
>
> I assumed (but didn't tested) that this untagging also change the starting
> point of the payload of the packet. So protocol handlers expecting to have
> the raw packet won't see the vlan header.

That would also be the case with hardware stripped tags - they need to
look into skb->vlan_tci anyway.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  9:20                         ` Michał Mirosław
@ 2011-05-22  9:36                           ` Jiri Pirko
  2011-05-22  9:53                             ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-05-22  9:36 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Nicolas de Pesloüan, Eric W. Biederman, Jesse Gross,
	Changli Gao, David Miller, netdev, shemminger, kaber, fubar,
	eric.dumazet, andy

Sun, May 22, 2011 at 11:20:09AM CEST, mirqus@gmail.com wrote:
>W dniu 22 maja 2011 11:10 użytkownik Nicolas de Pesloüan
><nicolas.2p.debian@gmail.com> napisał:
>> Le 22/05/2011 10:52, Michał Mirosław a écrit :
>>>
>>> 2011/5/22 Nicolas de Pesloüan<nicolas.2p.debian@gmail.com>:
>>>>
>>>> Le 22/05/2011 08:34, Eric W. Biederman a écrit :
>>>>>
>>>>> Jiri Pirko<jpirko@redhat.com>    writes:
>>>>>
>>>>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com
>>>>>> wrote:
>>>>>>
>>>>>> <snip>
>>>>>>>
>>>>>>> And because some setups may still require the skb not to be untagged,
>>>>>>> may be we need the ability to re-tag the skb in some situations...
>>>>>>> When a protocol handler or rx_handler is explicitly registered on a
>>>>>>> net_device which expect to receive tagged skb, we should deliver
>>>>>>> tagged skb to it... Arguably, this may sound incredible for the
>>>>>>> general case, but may be required for not-so-special cases like
>>>>>>> bridge or protocol analyzer.
>>>>>>
>>>>>> Wait, what setups/code require the skb not to be untagged? If there's
>>>>>> such, it should be fixed.
>>>>>
>>>>> tcpdump on the non-vlan interface for one.
>>>>
>>>> bridge is another. More precisely, there is a difference between the
>>>> following two setups:
>>>>
>>>> 1/ eth0 - eth0.100 - br0 - eth1.200 - eth1
>>>>
>>>> 2/ eth0 - br0 - eth1
>>>>
>>>> In case 1, it is normal and desirable for the bridge to see untagged skb.
>>>>
>>>> In case 2, it is desirable for the bridge to see untouched (possibly
>>>> tagged)
>>>> skb. If current bridge implementation is able to handle skb from which we
>>>> removed a tag, in this situation, it means that bridge currently "fix
>>>> improper untagging" by itself, by forcing re-tagging on output. I think
>>>> is
>>>> should not be the job of protocol handlers to fix this. Again, a generic
>>>> feature should to it when necessary.
>>>>
>>>> Think of the following setups:
>>>>
>>>> 3/ eth0 - br0 - eth1.200 - eth1.
>>>> 4/ eth0 - eth0.100 - br0 - eth1
>>>>
>>>> What if one expect this setup to add (3) or remove (4) one level of vlan
>>>> nesting? This is precisely what this setup suggest. How can we instruct
>>>> the
>>>> bridge to do so? It is not the bridge responsibility to do any vlan
>>>> processing. bridge is expected to... bridge !
>>>
>>> I assumed that this untaging Jiri is implementing does not remove the
>>> tag. It moves the information from skb->data to skb->vlan_tci, but the
>>> information contained is not otherwise changing. All your examples
>>> should work regardless of where the tag is stored.
>>
>> I assumed (but didn't tested) that this untagging also change the starting
>> point of the payload of the packet. So protocol handlers expecting to have
>> the raw packet won't see the vlan header.
>
>That would also be the case with hardware stripped tags - they need to
>look into skb->vlan_tci anyway.

Exactly. Nicolas, I do not see anything wrong on always untagging in all
your setups. As Michal said, vlan_tci keeps the info.

Jirka

>
>Best Regards,
>Michał Mirosław

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  8:38                 ` [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel Changli Gao
@ 2011-05-22  9:37                   ` Jiri Pirko
  2011-05-22 10:17                     ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-05-22  9:37 UTC (permalink / raw)
  To: Changli Gao
  Cc: Nicolas de Pesloüan, Jesse Gross, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, ebiederm

Sun, May 22, 2011 at 10:38:45AM CEST, xiaosuo@gmail.com wrote:
>On Sun, May 22, 2011 at 2:29 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>
>> <snip>
>>>
>>>And because some setups may still require the skb not to be untagged,
>>>may be we need the ability to re-tag the skb in some situations...
>>>When a protocol handler or rx_handler is explicitly registered on a
>>>net_device which expect to receive tagged skb, we should deliver
>>>tagged skb to it... Arguably, this may sound incredible for the
>>>general case, but may be required for not-so-special cases like
>>>bridge or protocol analyzer.
>>
>> Wait, what setups/code require the skb not to be untagged? If there's
>> such, it should be fixed.
>>
>
>For a transparent bridge with ports: eth0 and eth1, the vlan tags
>need to be preserved.

Yet they are - in skb->vlan_tci. I see no problem here. It's the same as
if the NIC does hw-untagging itself.

Jirka
>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  9:36                           ` Jiri Pirko
@ 2011-05-22  9:53                             ` Nicolas de Pesloüan
  2011-05-22 10:04                               ` Michał Mirosław
  0 siblings, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-22  9:53 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Michał Mirosław, Eric W. Biederman, Jesse Gross,
	Changli Gao, David Miller, netdev, shemminger, kaber, fubar,
	eric.dumazet, andy

Le 22/05/2011 11:36, Jiri Pirko a écrit :
> Sun, May 22, 2011 at 11:20:09AM CEST, mirqus@gmail.com wrote:
>> W dniu 22 maja 2011 11:10 użytkownik Nicolas de Pesloüan
>> <nicolas.2p.debian@gmail.com>  napisał:
>>> Le 22/05/2011 10:52, Michał Mirosław a écrit :
>>>>
>>>> 2011/5/22 Nicolas de Pesloüan<nicolas.2p.debian@gmail.com>:
>>>>>
>>>>> Le 22/05/2011 08:34, Eric W. Biederman a écrit :
>>>>>>
>>>>>> Jiri Pirko<jpirko@redhat.com>      writes:
>>>>>>
>>>>>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com
>>>>>>> wrote:
>>>>>>>
>>>>>>> <snip>
>>>>>>>>
>>>>>>>> And because some setups may still require the skb not to be untagged,
>>>>>>>> may be we need the ability to re-tag the skb in some situations...
>>>>>>>> When a protocol handler or rx_handler is explicitly registered on a
>>>>>>>> net_device which expect to receive tagged skb, we should deliver
>>>>>>>> tagged skb to it... Arguably, this may sound incredible for the
>>>>>>>> general case, but may be required for not-so-special cases like
>>>>>>>> bridge or protocol analyzer.
>>>>>>>
>>>>>>> Wait, what setups/code require the skb not to be untagged? If there's
>>>>>>> such, it should be fixed.
>>>>>>
>>>>>> tcpdump on the non-vlan interface for one.
>>>>>
>>>>> bridge is another. More precisely, there is a difference between the
>>>>> following two setups:
>>>>>
>>>>> 1/ eth0 - eth0.100 - br0 - eth1.200 - eth1
>>>>>
>>>>> 2/ eth0 - br0 - eth1
>>>>>
>>>>> In case 1, it is normal and desirable for the bridge to see untagged skb.
>>>>>
>>>>> In case 2, it is desirable for the bridge to see untouched (possibly
>>>>> tagged)
>>>>> skb. If current bridge implementation is able to handle skb from which we
>>>>> removed a tag, in this situation, it means that bridge currently "fix
>>>>> improper untagging" by itself, by forcing re-tagging on output. I think
>>>>> is
>>>>> should not be the job of protocol handlers to fix this. Again, a generic
>>>>> feature should to it when necessary.
>>>>>
>>>>> Think of the following setups:
>>>>>
>>>>> 3/ eth0 - br0 - eth1.200 - eth1.
>>>>> 4/ eth0 - eth0.100 - br0 - eth1
>>>>>
>>>>> What if one expect this setup to add (3) or remove (4) one level of vlan
>>>>> nesting? This is precisely what this setup suggest. How can we instruct
>>>>> the
>>>>> bridge to do so? It is not the bridge responsibility to do any vlan
>>>>> processing. bridge is expected to... bridge !
>>>>
>>>> I assumed that this untaging Jiri is implementing does not remove the
>>>> tag. It moves the information from skb->data to skb->vlan_tci, but the
>>>> information contained is not otherwise changing. All your examples
>>>> should work regardless of where the tag is stored.
>>>
>>> I assumed (but didn't tested) that this untagging also change the starting
>>> point of the payload of the packet. So protocol handlers expecting to have
>>> the raw packet won't see the vlan header.
>>
>> That would also be the case with hardware stripped tags - they need to
>> look into skb->vlan_tci anyway.
>
> Exactly. Nicolas, I do not see anything wrong on always untagging in all
> your setups. As Michal said, vlan_tci keeps the info.

I understand this.

But I don't understand how the bridge code is expected to know whether it should re-tag the packet 
or not before forwarding and which value to use as the egress vlan tag.

1/ eth0 - br0 - eth1 : the bridge is expected to retag using skb->vlan_tci value.

2/ eth0 - eth0.100 - br0 - eth1.200 - eth1 : the bridge is expected to retag using a different value 
than skb->vlan_tci.

3/ eth0 - eth0.100 - br0 - eth1 : the bridge is expected not to re-tag, because the expected 
behavior of this setup is to untag while crossing the bridge.

4/ eth0 - eth0.100 - eth0.100.300 - br0 - eth1.400 - eth1.200 - eth1 : the bridge is expected to 
retag using a different value than skb->vlan_tci. What value would skb->vlan_tci hold when the skb 
will be delivered to the bridge? 100 or 300?

 From my point of view, in both setup, the bridge will receive a single value in skb->vlan_tci and 
will lack any other indication to help it decide how to retag when forwarding.

I'm not against your idea to mimic hw-accel in software. But I'm concern about troubles for those 
who expect to have access to untouched packet. Your patch didn't cause those troubles to start 
happening, but cause them to always happen. Before your patch, someone had the option to use 
non-hw-accel NIC or to disable hw-accel feature if possible. Now, it's no more possible.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  9:53                             ` Nicolas de Pesloüan
@ 2011-05-22 10:04                               ` Michał Mirosław
  0 siblings, 0 replies; 92+ messages in thread
From: Michał Mirosław @ 2011-05-22 10:04 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Jiri Pirko, Eric W. Biederman, Jesse Gross, Changli Gao,
	David Miller, netdev, shemminger, kaber, fubar, eric.dumazet,
	andy

2011/5/22 Nicolas de Pesloüan <nicolas.2p.debian@gmail.com>:
> Le 22/05/2011 11:36, Jiri Pirko a écrit :
>> Sun, May 22, 2011 at 11:20:09AM CEST, mirqus@gmail.com wrote:
>>> W dniu 22 maja 2011 11:10 użytkownik Nicolas de Pesloüan
>>> <nicolas.2p.debian@gmail.com>  napisał:
>>>> Le 22/05/2011 10:52, Michał Mirosław a écrit :
>>>>> I assumed that this untaging Jiri is implementing does not remove the
>>>>> tag. It moves the information from skb->data to skb->vlan_tci, but the
>>>>> information contained is not otherwise changing. All your examples
>>>>> should work regardless of where the tag is stored.
>>>> I assumed (but didn't tested) that this untagging also change the
>>>> starting
>>>> point of the payload of the packet. So protocol handlers expecting to
>>>> have
>>>> the raw packet won't see the vlan header.
>>> That would also be the case with hardware stripped tags - they need to
>>> look into skb->vlan_tci anyway.
>> Exactly. Nicolas, I do not see anything wrong on always untagging in all
>> your setups. As Michal said, vlan_tci keeps the info.
>
> I understand this.
>
> But I don't understand how the bridge code is expected to know whether it
> should re-tag the packet or not before forwarding and which value to use as
> the egress vlan tag.
>
> 1/ eth0 - br0 - eth1 : the bridge is expected to retag using skb->vlan_tci
> value.
>
> 2/ eth0 - eth0.100 - br0 - eth1.200 - eth1 : the bridge is expected to retag
> using a different value than skb->vlan_tci.

> 3/ eth0 - eth0.100 - br0 - eth1 : the bridge is expected not to re-tag,
> because the expected behavior of this setup is to untag while crossing the
> bridge.
>
> 4/ eth0 - eth0.100 - eth0.100.300 - br0 - eth1.400 - eth1.200 - eth1 : the
> bridge is expected to retag using a different value than skb->vlan_tci. What
> value would skb->vlan_tci hold when the skb will be delivered to the bridge?
> 100 or 300?
>
> From my point of view, in both setup, the bridge will receive a single value
> in skb->vlan_tci and will lack any other indication to help it decide how to
> retag when forwarding.

Packets looking like they came from eth0.100 will have skb->vlan_tci
cleared (like taking packet out of a tunnel) and then possibly filled
again with inner tag. It's really convenient to thing of VLANs as
tunnels.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  9:37                   ` Jiri Pirko
@ 2011-05-22 10:17                     ` Changli Gao
  2011-05-22 10:26                       ` Eric W. Biederman
  2011-05-22 13:16                       ` Jiri Pirko
  0 siblings, 2 replies; 92+ messages in thread
From: Changli Gao @ 2011-05-22 10:17 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Nicolas de Pesloüan, Jesse Gross, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, ebiederm

On Sun, May 22, 2011 at 5:37 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>>
>>For a transparent bridge with ports: eth0 and eth1, the vlan tags
>>need to be preserved.
>
> Yet they are - in skb->vlan_tci. I see no problem here. It's the same as
> if the NIC does hw-untagging itself.
>

Although they are in skb->vlan_tci, but the xmit nic, which doesn't
support hw-accel-vlan-tx, doesn't use them to tag the outgoing
packets. Correct me if I am wrong.

I know the current software emulation doesn't strip the vlan headers,
but I want to know in what way you fix the current misuse of
vlan_check_reorder_header(). If you emulation what the HW does
exactly, I am afraid more work are needed. IMHO, this way is more
dangerous than just a revert for this bugfix cycle.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22 10:17                     ` Changli Gao
@ 2011-05-22 10:26                       ` Eric W. Biederman
  2011-05-22 10:40                         ` Changli Gao
  2011-05-22 13:16                       ` Jiri Pirko
  1 sibling, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 10:26 UTC (permalink / raw)
  To: Changli Gao
  Cc: Jiri Pirko, Nicolas de Pesloüan, Jesse Gross, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

Changli Gao <xiaosuo@gmail.com> writes:

> On Sun, May 22, 2011 at 5:37 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>>>
>>>For a transparent bridge with ports: eth0 and eth1, the vlan tags
>>>need to be preserved.
>>
>> Yet they are - in skb->vlan_tci. I see no problem here. It's the same as
>> if the NIC does hw-untagging itself.
>>
>
> Although they are in skb->vlan_tci, but the xmit nic, which doesn't
> support hw-accel-vlan-tx, doesn't use them to tag the outgoing
> packets. Correct me if I am wrong.

There is software emulation in dev_hard_start_xmit for the nics
that don't support the feature.  So we will properly add tags.

Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22 10:26                       ` Eric W. Biederman
@ 2011-05-22 10:40                         ` Changli Gao
  0 siblings, 0 replies; 92+ messages in thread
From: Changli Gao @ 2011-05-22 10:40 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jiri Pirko, Nicolas de Pesloüan, Jesse Gross, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

On Sun, May 22, 2011 at 6:26 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
>
> There is software emulation in dev_hard_start_xmit for the nics
> that don't support the feature.  So we will properly add tags.
>

Thanks. I got it. :)


-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22 10:17                     ` Changli Gao
  2011-05-22 10:26                       ` Eric W. Biederman
@ 2011-05-22 13:16                       ` Jiri Pirko
  1 sibling, 0 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-05-22 13:16 UTC (permalink / raw)
  To: Changli Gao
  Cc: Nicolas de Pesloüan, Jesse Gross, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, ebiederm

<snip>

>
>I know the current software emulation doesn't strip the vlan headers,
>but I want to know in what way you fix the current misuse of
>vlan_check_reorder_header(). If you emulation what the HW does
>exactly, I am afraid more work are needed. IMHO, this way is more
>dangerous than just a revert for this bugfix cycle.

Looking at this. Will follow-up soon.

>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22  6:34                 ` Eric W. Biederman
  2011-05-22  8:34                   ` Nicolas de Pesloüan
@ 2011-05-22 16:11                   ` Jesse Gross
  2011-05-22 18:24                     ` Eric W. Biederman
  2011-05-22 19:39                     ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Eric W. Biederman
  1 sibling, 2 replies; 92+ messages in thread
From: Jesse Gross @ 2011-05-22 16:11 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jiri Pirko, Nicolas de Pesloüan, Changli Gao, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

On Sat, May 21, 2011 at 11:34 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Jiri Pirko <jpirko@redhat.com> writes:
>
>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>
>> <snip>
>>>
>>>And because some setups may still require the skb not to be untagged,
>>>may be we need the ability to re-tag the skb in some situations...
>>>When a protocol handler or rx_handler is explicitly registered on a
>>>net_device which expect to receive tagged skb, we should deliver
>>>tagged skb to it... Arguably, this may sound incredible for the
>>>general case, but may be required for not-so-special cases like
>>>bridge or protocol analyzer.
>>
>> Wait, what setups/code require the skb not to be untagged? If there's
>> such, it should be fixed.
>
> tcpdump on the non-vlan interface for one.

There are some drivers still using the old vlan model that will drop
tags or packets when no vlan group is configured but that's a driver
problem, not one with networking core or tcpdump.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22 16:11                   ` Jesse Gross
@ 2011-05-22 18:24                     ` Eric W. Biederman
  2011-05-22 19:33                       ` Eric W. Biederman
  2011-05-22 19:39                     ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Eric W. Biederman
  1 sibling, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 18:24 UTC (permalink / raw)
  To: Jesse Gross
  Cc: Jiri Pirko, Nicolas de Pesloüan, Changli Gao, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

Jesse Gross <jesse@nicira.com> writes:

> On Sat, May 21, 2011 at 11:34 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Jiri Pirko <jpirko@redhat.com> writes:
>>
>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>>
>>> <snip>
>>>>
>>>>And because some setups may still require the skb not to be untagged,
>>>>may be we need the ability to re-tag the skb in some situations...
>>>>When a protocol handler or rx_handler is explicitly registered on a
>>>>net_device which expect to receive tagged skb, we should deliver
>>>>tagged skb to it... Arguably, this may sound incredible for the
>>>>general case, but may be required for not-so-special cases like
>>>>bridge or protocol analyzer.
>>>
>>> Wait, what setups/code require the skb not to be untagged? If there's
>>> such, it should be fixed.
>>
>> tcpdump on the non-vlan interface for one.
>
> There are some drivers still using the old vlan model that will drop
> tags or packets when no vlan group is configured but that's a driver
> problem, not one with networking core or tcpdump.

On receive if we have stripped the vlan header and then we go to deliver
the interrupt to a pf_packet socket (on a non-vlan interface) before
or as part of the deliver of the packet to user space we need to
re-add the vlan header.  Additionally the socket filter on a pf_packet
socket needs to behave as though we have a vlan header.

So no I am not talking about anything that is driver specific.  I am
talking about reasonable userspace expectations.  Because otherwise
we simply loose the information that a packet was vlan tagged, and
in doing so we break existing userspace applications because of our
bugs.



Eric




^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel
  2011-05-22 18:24                     ` Eric W. Biederman
@ 2011-05-22 19:33                       ` Eric W. Biederman
  0 siblings, 0 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 19:33 UTC (permalink / raw)
  To: Jesse Gross
  Cc: Jiri Pirko, Nicolas de Pesloüan, Changli Gao, David Miller,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy

ebiederm@xmission.com (Eric W. Biederman) writes:

> Jesse Gross <jesse@nicira.com> writes:
>
>> On Sat, May 21, 2011 at 11:34 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>> Jiri Pirko <jpirko@redhat.com> writes:
>>>
>>>> Sun, May 22, 2011 at 04:59:49AM CEST, nicolas.2p.debian@gmail.com wrote:
>>>>
>>>> <snip>
>>>>>
>>>>>And because some setups may still require the skb not to be untagged,
>>>>>may be we need the ability to re-tag the skb in some situations...
>>>>>When a protocol handler or rx_handler is explicitly registered on a
>>>>>net_device which expect to receive tagged skb, we should deliver
>>>>>tagged skb to it... Arguably, this may sound incredible for the
>>>>>general case, but may be required for not-so-special cases like
>>>>>bridge or protocol analyzer.
>>>>
>>>> Wait, what setups/code require the skb not to be untagged? If there's
>>>> such, it should be fixed.
>>>
>>> tcpdump on the non-vlan interface for one.
>>
>> There are some drivers still using the old vlan model that will drop
>> tags or packets when no vlan group is configured but that's a driver
>> problem, not one with networking core or tcpdump.
>
> On receive if we have stripped the vlan header and then we go to deliver
> the interrupt to a pf_packet socket (on a non-vlan interface) before
> or as part of the deliver of the packet to user space we need to
> re-add the vlan header.  Additionally the socket filter on a pf_packet
> socket needs to behave as though we have a vlan header.

Hmm.  Taking a second look the pf_packet code and with hardware vlan
header removal isn't completely broken.  It is possible to receive
packet auxdata and get the information from the vlan header.

It stills seems like a pretty messed up interface to me.  Especially
since the socket filter can't get at the information in the stripped
vlan header, but that is another matter entirely.

Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-22 16:11                   ` Jesse Gross
  2011-05-22 18:24                     ` Eric W. Biederman
@ 2011-05-22 19:39                     ` Eric W. Biederman
  2011-05-22 19:40                       ` [PATCH 2/3] vlan: Always strip the vlan header in vlan_untag Eric W. Biederman
                                         ` (2 more replies)
  1 sibling, 3 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 19:39 UTC (permalink / raw)
  To: David Miller
  Cc: Jiri Pirko, Nicolas de Pesloüan, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross


Simplify the vlan handling code by not supporing clearing of
VLAN_FLAG_REORDER_HDR.  Which means we always make the vlan handling
code strip the vlan header from the packets, and always insert the vlan
header when transmitting packets.

Not stripping the vlan header has alwasy been broken in combination with
vlan hardware accelleration.  Now that we are making everything look
like accelerated vlan handling not stripping the vlan header is always
broken.

I don't think anyone actually cares so simply stop supporting the broken
case.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 net/8021q/vlan_dev.c     |    3 +--
 net/8021q/vlan_netlink.c |    4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f247f5b..20629fe 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -242,8 +242,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	u32 old_flags = vlan->flags;
 
-	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		     VLAN_FLAG_LOOSE_BINDING))
+	if (mask & ~(VLAN_FLAG_GVRP | VLAN_FLAG_LOOSE_BINDING))
 		return -EINVAL;
 
 	vlan->flags = (old_flags & ~mask) | (flags & mask);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index be9a5c1..a638a4c 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -59,9 +59,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	}
 	if (data[IFLA_VLAN_FLAGS]) {
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
-		if ((flags->flags & flags->mask) &
-		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
-		      VLAN_FLAG_LOOSE_BINDING))
+		if (flags->mask & ~(VLAN_FLAG_GVRP | VLAN_FLAG_LOOSE_BINDING))
 			return -EINVAL;
 	}
 
-- 
1.7.5.1.217.g4e3aa


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 2/3] vlan: Always strip the vlan header in vlan_untag.
  2011-05-22 19:39                     ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Eric W. Biederman
@ 2011-05-22 19:40                       ` Eric W. Biederman
  2011-05-22 19:42                         ` [PATCH 3/3] vlan: Simplify the code now that VLAN_FLAG_REORDER_HDR is always set Eric W. Biederman
  2011-05-22 21:04                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Ben Greear
  2011-06-09 11:00                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Jiri Pirko
  2 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 19:40 UTC (permalink / raw)
  To: David Miller
  Cc: Jiri Pirko, Nicolas de Pesloüan, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross


Calling vlan_dev_info() on a non-vlan device gives completely undefined
results, and is no longer needed now that we no longer support keeping
the vlan tag.

Simplify and correct the code by remove the check for VLAN_FLAG_REORDER_HDR.

Additionally rename vlan_check_reorder_header to just
vlan_reorder_header as there is no check involved now.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 net/8021q/vlan_core.c |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc..302d8e3 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -89,17 +89,15 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_frags);
 
-static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		skb = NULL;
+	if (skb) {
+		/* Lifted from Gleb's VLAN code... */
+		memmove(skb->data - ETH_HLEN,
+			skb->data - VLAN_ETH_HLEN, 12);
+		skb->mac_header += VLAN_HLEN;
 	}
 	return skb;
 }
@@ -161,7 +159,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
+	skb = vlan_reorder_header(skb);
 	if (unlikely(!skb))
 		goto err_free;
 
-- 
1.7.5.1.217.g4e3aa


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH 3/3] vlan: Simplify the code now that VLAN_FLAG_REORDER_HDR is always set
  2011-05-22 19:40                       ` [PATCH 2/3] vlan: Always strip the vlan header in vlan_untag Eric W. Biederman
@ 2011-05-22 19:42                         ` Eric W. Biederman
  2011-06-09 10:59                           ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 19:42 UTC (permalink / raw)
  To: David Miller
  Cc: Jiri Pirko, Nicolas de Pesloüan, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross


Now that we no longer support clearing VLAN_FLAG_REORDER_HDR remove the
code that was needed to cope with the case when it was cleared.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 net/8021q/vlan_dev.c |   45 +++++----------------------------------------
 1 files changed, 5 insertions(+), 40 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 20629fe..2b3ca1e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -96,63 +96,28 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				const void *daddr, const void *saddr,
 				unsigned int len)
 {
-	struct vlan_hdr *vhdr;
-	unsigned int vhdrlen = 0;
-	u16 vlan_tci = 0;
 	int rc;
 
-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
-		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
-
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		vhdr->h_vlan_TCI = htons(vlan_tci);
-
-		/*
-		 *  Set the protocol type. For a packet of type ETH_P_802_3/2 we
-		 *  put the length in here instead.
-		 */
-		if (type != ETH_P_802_3 && type != ETH_P_802_2)
-			vhdr->h_vlan_encapsulated_proto = htons(type);
-		else
-			vhdr->h_vlan_encapsulated_proto = htons(len);
-
-		skb->protocol = htons(ETH_P_8021Q);
-		type = ETH_P_8021Q;
-		vhdrlen = VLAN_HLEN;
-	}
-
 	/* Before delegating work to the lower layer, enter our MAC-address */
 	if (saddr == NULL)
 		saddr = dev->dev_addr;
 
 	/* Now make the underlying real hard header */
 	dev = vlan_dev_info(dev)->real_dev;
-	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
-	if (rc > 0)
-		rc += vhdrlen;
+	rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
 	return rc;
 }
 
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
-	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 	unsigned int len;
+	u16 vlan_tci;
 	int ret;
 
-	/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
-	 *
-	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
-	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
-	 */
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		u16 vlan_tci;
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
-	}
+	vlan_tci = vlan_dev_info(dev)->vlan_id;
+	vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+	skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 
 	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
 	len = skb->len;
-- 
1.7.5.1.217.g4e3aa


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-22 19:39                     ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Eric W. Biederman
  2011-05-22 19:40                       ` [PATCH 2/3] vlan: Always strip the vlan header in vlan_untag Eric W. Biederman
@ 2011-05-22 21:04                       ` Ben Greear
  2011-05-22 22:38                         ` Eric W. Biederman
  2011-06-09 11:00                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Jiri Pirko
  2 siblings, 1 reply; 92+ messages in thread
From: Ben Greear @ 2011-05-22 21:04 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Jiri Pirko, Nicolas de Pesloüan, Changli Gao,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

On 05/22/2011 12:39 PM, Eric W. Biederman wrote:
>
> Simplify the vlan handling code by not supporing clearing of
> VLAN_FLAG_REORDER_HDR.  Which means we always make the vlan handling
> code strip the vlan header from the packets, and always insert the vlan
> header when transmitting packets.
>
> Not stripping the vlan header has alwasy been broken in combination with
> vlan hardware accelleration.  Now that we are making everything look
> like accelerated vlan handling not stripping the vlan header is always
> broken.
>
> I don't think anyone actually cares so simply stop supporting the broken
> case.

I've lost track of the VLAN code a bit.  Is there any documentation
somewhere about what happens in these various cases:

*  Open a raw packet socket on eth0.
   *  Do we get tagged VLAN packets?  (I'd expect yes.)
   *  If we sent a tagged VLAN packet, it's sent without modification?  (I'd expect yes.)
   **  Without "yes" to the two above, one cannot do user-space bridging properly.

*  Open a raw packet socket on VLAN eth0.5
   *  Do we get tagged VLAN packets?  (I'd expect no.)
   *  If we send an un-tagged packet, I expect it would be tagged?
   *  What if we sent a tagged packet with same VID?
   *  With different VID?

Many years ago we supported the REORDER, but we suggested disabling
it for most users because it was a performance drag.  Funny that now
it seems to be the opposite!

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-22 21:04                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Ben Greear
@ 2011-05-22 22:38                         ` Eric W. Biederman
  2011-05-23  0:38                           ` Changli Gao
  2011-05-23  6:01                           ` Ben Greear
  0 siblings, 2 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-22 22:38 UTC (permalink / raw)
  To: Ben Greear
  Cc: David Miller, Jiri Pirko, Nicolas de Pesloüan, Changli Gao,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

Ben Greear <greearb@candelatech.com> writes:

> On 05/22/2011 12:39 PM, Eric W. Biederman wrote:
>>
>> Simplify the vlan handling code by not supporing clearing of
>> VLAN_FLAG_REORDER_HDR.  Which means we always make the vlan handling
>> code strip the vlan header from the packets, and always insert the vlan
>> header when transmitting packets.
>>
>> Not stripping the vlan header has alwasy been broken in combination with
>> vlan hardware accelleration.  Now that we are making everything look
>> like accelerated vlan handling not stripping the vlan header is always
>> broken.
>>
>> I don't think anyone actually cares so simply stop supporting the broken
>> case.
>
> I've lost track of the VLAN code a bit.  Is there any documentation
> somewhere about what happens in these various cases:

Other than the code I don't know about documentation.

> *  Open a raw packet socket on eth0.
I assume you mean a pf_packet socket.

>   *  Do we get tagged VLAN packets?  (I'd expect yes.)
yes.

>   *  If we sent a tagged VLAN packet, it's sent without modification?  (I'd expect yes.)
>   **  Without "yes" to the two above, one cannot do user-space bridging properly.

This is sort of.  If you set the PACKET_AUXDATA option and use recv_cmsg
you get the priority and the vlan identifier in the auxdata.

I think that is a pretty horrible answer myself but it has been that way
since sometime mid 2008.  So I'm not immediately prepared to call it
a regression, or a bug.

> *  Open a raw packet socket on VLAN eth0.5
>   *  Do we get tagged VLAN packets?  (I'd expect no.)
No.

>   *  If we send an un-tagged packet, I expect it would be tagged?
Yes.

>   *  What if we sent a tagged packet with same VID?
>   *  With different VID?
The vlan tag is ignored and another vlan tag is added.

> Many years ago we supported the REORDER, but we suggested disabling
> it for most users because it was a performance drag.  Funny that now
> it seems to be the opposite!

Yes it is funny.  I looked in history a while back and what I saw was
that REORDER was always enabled by default and it took some serious
effort to figure out how to get vconfig to disable REORDER. ip doesn't
admit that REORDER can be disabled at all.

Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-22 22:38                         ` Eric W. Biederman
@ 2011-05-23  0:38                           ` Changli Gao
  2011-05-23  1:26                             ` Changli Gao
  2011-05-23  1:39                             ` Eric W. Biederman
  2011-05-23  6:01                           ` Ben Greear
  1 sibling, 2 replies; 92+ messages in thread
From: Changli Gao @ 2011-05-23  0:38 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ben Greear, David Miller, Jiri Pirko, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

On Mon, May 23, 2011 at 6:38 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
>
>> Many years ago we supported the REORDER, but we suggested disabling
>> it for most users because it was a performance drag.  Funny that now
>> it seems to be the opposite!
>
> Yes it is funny.  I looked in history a while back and what I saw was
> that REORDER was always enabled by default and it took some serious
> effort to figure out how to get vconfig to disable REORDER. ip doesn't
> admit that REORDER can be disabled at all.
>

Really?

Quoted from the manual page of vconfig
       set_flag [vlan-device] 0 | 1
              When  1,  ethernet  header  reorders  are turned on. Dumping the
              device will appear as a common ethernet  device  without  vlans.
              When  0(default)  however,  ethernet  headers are not reordered,
              which results in vlan tagged packets when  dumping  the  device.
              Usually the default gives no problems, but some packet filtering
              programs might have problems with it.

reordered is disabled by default. I also concern the performance.
Untag and then tag are expensive for the NICs which don't support
hw-accel-vlan-rx/tx.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  0:38                           ` Changli Gao
@ 2011-05-23  1:26                             ` Changli Gao
  2011-05-23  1:45                               ` Eric W. Biederman
  2011-05-23  1:39                             ` Eric W. Biederman
  1 sibling, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-05-23  1:26 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ben Greear, David Miller, Jiri Pirko, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

On Mon, May 23, 2011 at 8:38 AM, Changli Gao <xiaosuo@gmail.com> wrote:
> On Mon, May 23, 2011 at 6:38 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>>
>>> Many years ago we supported the REORDER, but we suggested disabling
>>> it for most users because it was a performance drag.  Funny that now
>>> it seems to be the opposite!
>>
>> Yes it is funny.  I looked in history a while back and what I saw was
>> that REORDER was always enabled by default and it took some serious
>> effort to figure out how to get vconfig to disable REORDER. ip doesn't
>> admit that REORDER can be disabled at all.
>>
>
> Really?
>
> Quoted from the manual page of vconfig
>       set_flag [vlan-device] 0 | 1
>              When  1,  ethernet  header  reorders  are turned on. Dumping the
>              device will appear as a common ethernet  device  without  vlans.
>              When  0(default)  however,  ethernet  headers are not reordered,
>              which results in vlan tagged packets when  dumping  the  device.
>              Usually the default gives no problems, but some packet filtering
>              programs might have problems with it.
>
> reordered is disabled by default. I also concern the performance.
> Untag and then tag are expensive for the NICs which don't support
> hw-accel-vlan-rx/tx.
>

For ip:
localhost ~ # ip link add link eth0 vlan1 type vlan help
Usage: ... vlan id VLANID [ FLAG-LIST ]
                          [ ingress-qos-map QOS-MAP ] [ egress-qos-map QOS-MAP ]

VLANID := 0-4095
FLAG-LIST := [ FLAG-LIST ] FLAG
FLAG := [ reorder_hdr { on | off } ] [ gvrp { on | off } ]
        [ loose_binding { on | off } ]
QOS-MAP := [ QOS-MAP ] QOS-MAPPING
QOS-MAPPING := FROM:TO

After checking the code, I found reorder_hdr is off by default.

In another side, is there a specification which defines the hw-accel-vlan-rx?

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  0:38                           ` Changli Gao
  2011-05-23  1:26                             ` Changli Gao
@ 2011-05-23  1:39                             ` Eric W. Biederman
  1 sibling, 0 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23  1:39 UTC (permalink / raw)
  To: Changli Gao
  Cc: Ben Greear, David Miller, Jiri Pirko, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

Changli Gao <xiaosuo@gmail.com> writes:

> On Mon, May 23, 2011 at 6:38 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>>
>>> Many years ago we supported the REORDER, but we suggested disabling
>>> it for most users because it was a performance drag.  Funny that now
>>> it seems to be the opposite!
>>
>> Yes it is funny.  I looked in history a while back and what I saw was
>> that REORDER was always enabled by default and it took some serious
>> effort to figure out how to get vconfig to disable REORDER. ip doesn't
>> admit that REORDER can be disabled at all.
>>
>
> Really?

> Quoted from the manual page of vconfig
>        set_flag [vlan-device] 0 | 1
>               When  1,  ethernet  header  reorders  are turned on. Dumping the
>               device will appear as a common ethernet  device  without  vlans.
>               When  0(default)  however,  ethernet  headers are not reordered,
>               which results in vlan tagged packets when  dumping  the  device.
>               Usually the default gives no problems, but some packet filtering
>               programs might have problems with it.
>
> reordered is disabled by default. I also concern the performance.
> Untag and then tag are expensive for the NICs which don't support
> hw-accel-vlan-rx/tx.

Yes really the vconfig man page is wrong.  Reorder has been enabled
by default since the code was merged.  See below.

Dhcp among other things does not work if you disable reorder.

As for performace we are moving 12 bytes which should be cache hot
4 bytes later in what should be the same cache line.  I expect my
16Mhz 386 will slow down a little for an operations like that, I
don't expect much else will.  I can't see anything short of benchmark
numbers being persuasive.

The other reality is that the code is honestly and truly broken in
the corner cases.  If we don't consolidate the code paths it will
never operate correctly.

Not that I agree 100% with all of the decisions but the code has
been broken for a while for most users so we really need to make
it consistent and fix the bugs.

Furthermore to the best of my knowledge no one actually clears
the reorder bit.  Changli the fact that you don't know that the
reorder bit is set by default suggest that you don't clear it
in the cases you are concerned about.

If no one is clearing the reorder bit in practice all that we
really have for the vlan changes are code motion and simplification
which should only have a positive impact on performance.

Eric

Aka the vlan code came in at:

commit 15a435fe2c0f649e2879e51e05fa466c2bb12731
Author: torvalds <torvalds>
Date:   Tue Feb 5 20:30:01 2002 +0000

    v2.4.13.5 -> v2.4.13.6
    
      - me: remember to bump the version number ;)
      - Hugh Dickins: export "free_lru_page()" for modules
      - Jeff Garzik: don't change nopage arguments, just make the last a dummy one
      - David Miller: sparc and net updates (netfilter, VLAN etc)
      - Nikita Danilov: reiserfs cleanups
      - Jan Kara: quota initialization race
      - Tigran Aivazian: make the x86 microcode update driver happy about
      hyperthreaded P4's
      - me: shrink dcache/icache more aggressively
      - me: fix up oom-killer so that it actually works
    
    BKrev: 3c6040c9uewuD0S9AwFCfH3_YMyRHQ

[snip]
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
new file mode 100644
index 0000000..e549e33
--- /dev/null
+++ b/net/8021q/vlan.c
[snip]
+/* DO reorder the header by default */
+unsigned short vlan_default_dev_flags = 1;

[snip]
+/*  Attach a VLAN device to a mac address (ie Ethernet Card).
+ *  Returns the device that was created, or NULL if there was
+ *  an error of some kind.
+ */
+struct net_device *register_802_1Q_vlan_device(const char* eth_IF_name,
+					       unsigned short VLAN_ID)
+{
[snip]
+	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through 0xFFF */
+	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
+	VLAN_DEV_INFO(new_dev)->dent = NULL;
+	VLAN_DEV_INFO(new_dev)->flags = vlan_default_dev_flags;
[snip]

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  1:26                             ` Changli Gao
@ 2011-05-23  1:45                               ` Eric W. Biederman
  2011-05-23  2:14                                 ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23  1:45 UTC (permalink / raw)
  To: Changli Gao
  Cc: Ben Greear, David Miller, Jiri Pirko, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

Changli Gao <xiaosuo@gmail.com> writes:

> On Mon, May 23, 2011 at 8:38 AM, Changli Gao <xiaosuo@gmail.com> wrote:
>> On Mon, May 23, 2011 at 6:38 AM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>>
>>>> Many years ago we supported the REORDER, but we suggested disabling
>>>> it for most users because it was a performance drag.  Funny that now
>>>> it seems to be the opposite!
>>>
>>> Yes it is funny.  I looked in history a while back and what I saw was
>>> that REORDER was always enabled by default and it took some serious
>>> effort to figure out how to get vconfig to disable REORDER. ip doesn't
>>> admit that REORDER can be disabled at all.
>>>
>>
>> Really?
>>
>> Quoted from the manual page of vconfig
>>       set_flag [vlan-device] 0 | 1
>>              When  1,  ethernet  header  reorders  are turned on. Dumping the
>>              device will appear as a common ethernet  device  without  vlans.
>>              When  0(default)  however,  ethernet  headers are not reordered,
>>              which results in vlan tagged packets when  dumping  the  device.
>>              Usually the default gives no problems, but some packet filtering
>>              programs might have problems with it.
>>
>> reordered is disabled by default. I also concern the performance.
>> Untag and then tag are expensive for the NICs which don't support
>> hw-accel-vlan-rx/tx.
>>
>
> For ip:
> localhost ~ # ip link add link eth0 vlan1 type vlan help
> Usage: ... vlan id VLANID [ FLAG-LIST ]
>                           [ ingress-qos-map QOS-MAP ] [ egress-qos-map QOS-MAP ]

Apparently I was blind when I looked at iproute.  I am certain I didn't
see it there but iproute clearly has the option to set or clear reorder_hdr.

> VLANID := 0-4095
> FLAG-LIST := [ FLAG-LIST ] FLAG
> FLAG := [ reorder_hdr { on | off } ] [ gvrp { on | off } ]
>         [ loose_binding { on | off } ]
> QOS-MAP := [ QOS-MAP ] QOS-MAPPING
> QOS-MAPPING := FROM:TO
>
> After checking the code, I found reorder_hdr is off by default.

In iproute reorder_hdr is not modified by default which is subtly
different.

> In another side, is there a specification which defines the
> hw-accel-vlan-rx?

I don't know.

I have just been trying to clean up the mess since some of the
hw-accel-vlan code broke my use case, by delivering packets with
priority but no vlan (aka vlan 0 packets) twice to my pf_packet sockets.

Eric


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  1:45                               ` Eric W. Biederman
@ 2011-05-23  2:14                                 ` Changli Gao
  2011-05-23  9:41                                   ` Eric W. Biederman
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-05-23  2:14 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Ben Greear, David Miller, Jiri Pirko, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

On Mon, May 23, 2011 at 9:45 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
>> In another side, is there a specification which defines the
>> hw-accel-vlan-rx?
>
> I don't know.
>
> I have just been trying to clean up the mess since some of the
> hw-accel-vlan code broke my use case, by delivering packets with
> priority but no vlan (aka vlan 0 packets) twice to my pf_packet sockets.
>

OK. But if we have decided to simulate the hw-accel-vlan-rx, I think
we'd better adjust the place where we put the emulation code. The very
beginnings of netif_rx() and neif_receive_skb() are better. Then rps
can support vlan packets without any change.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-22 22:38                         ` Eric W. Biederman
  2011-05-23  0:38                           ` Changli Gao
@ 2011-05-23  6:01                           ` Ben Greear
  2011-05-23  9:00                             ` Eric W. Biederman
  1 sibling, 1 reply; 92+ messages in thread
From: Ben Greear @ 2011-05-23  6:01 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Jiri Pirko, Nicolas de Pesloüan, Changli Gao,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

On 05/22/2011 03:38 PM, Eric W. Biederman wrote:
> Ben Greear<greearb@candelatech.com>  writes:
>
>> On 05/22/2011 12:39 PM, Eric W. Biederman wrote:
>>>
>>> Simplify the vlan handling code by not supporing clearing of
>>> VLAN_FLAG_REORDER_HDR.  Which means we always make the vlan handling
>>> code strip the vlan header from the packets, and always insert the vlan
>>> header when transmitting packets.
>>>
>>> Not stripping the vlan header has alwasy been broken in combination with
>>> vlan hardware accelleration.  Now that we are making everything look
>>> like accelerated vlan handling not stripping the vlan header is always
>>> broken.
>>>
>>> I don't think anyone actually cares so simply stop supporting the broken
>>> case.
>>
>> I've lost track of the VLAN code a bit.  Is there any documentation
>> somewhere about what happens in these various cases:
>
> Other than the code I don't know about documentation.

These cases are tricky and probably have changed over
the years.  It would be nice to have it written down
somewhere, even if just in comments somewhere in the
VLAN code.

>
>> *  Open a raw packet socket on eth0.
> I assume you mean a pf_packet socket.

Yes.

>>    *  Do we get tagged VLAN packets?  (I'd expect yes.)
> yes.
>
>>    *  If we sent a tagged VLAN packet, it's sent without modification?  (I'd expect yes.)
>>    **  Without "yes" to the two above, one cannot do user-space bridging properly.
>
> This is sort of.  If you set the PACKET_AUXDATA option and use recv_cmsg
> you get the priority and the vlan identifier in the auxdata.
>
> I think that is a pretty horrible answer myself but it has been that way
> since sometime mid 2008.  So I'm not immediately prepared to call it
> a regression, or a bug.

I believe we have been getting tagged VLAN packets properly
in our test cases.  We would not be creating any VLAN devices
in this case, so perhaps the NIC isn't doing any stripping.

To me, it seems like we should get the fully tagged packet
without having to go muck with aux-data, though it would
be fine if it were *also* in aux-data.

I'll try to test this again this coming week to make sure
it's working like I think it is.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  6:01                           ` Ben Greear
@ 2011-05-23  9:00                             ` Eric W. Biederman
  2011-05-23 16:33                               ` Ben Greear
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23  9:00 UTC (permalink / raw)
  To: Ben Greear
  Cc: David Miller, Jiri Pirko, Nicolas de Pesloüan, Changli Gao,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

Ben Greear <greearb@candelatech.com> writes:

> On 05/22/2011 03:38 PM, Eric W. Biederman wrote:
>> Ben Greear<greearb@candelatech.com>  writes:
>>
>>> On 05/22/2011 12:39 PM, Eric W. Biederman wrote:
>>>>
>>>> Simplify the vlan handling code by not supporing clearing of
>>>> VLAN_FLAG_REORDER_HDR.  Which means we always make the vlan handling
>>>> code strip the vlan header from the packets, and always insert the vlan
>>>> header when transmitting packets.
>>>>
>>>> Not stripping the vlan header has alwasy been broken in combination with
>>>> vlan hardware accelleration.  Now that we are making everything look
>>>> like accelerated vlan handling not stripping the vlan header is always
>>>> broken.
>>>>
>>>> I don't think anyone actually cares so simply stop supporting the broken
>>>> case.
>>>
>>> I've lost track of the VLAN code a bit.  Is there any documentation
>>> somewhere about what happens in these various cases:
>>
>> Other than the code I don't know about documentation.
>
> These cases are tricky and probably have changed over
> the years.  It would be nice to have it written down
> somewhere, even if just in comments somewhere in the
> VLAN code.
>
>>
>>> *  Open a raw packet socket on eth0.
>> I assume you mean a pf_packet socket.
>
> Yes.
>
>>>    *  Do we get tagged VLAN packets?  (I'd expect yes.)
>> yes.
>>
>>>    *  If we sent a tagged VLAN packet, it's sent without modification?  (I'd expect yes.)
>>>    **  Without "yes" to the two above, one cannot do user-space bridging properly.
>>
>> This is sort of.  If you set the PACKET_AUXDATA option and use recv_cmsg
>> you get the priority and the vlan identifier in the auxdata.
>>
>> I think that is a pretty horrible answer myself but it has been that way
>> since sometime mid 2008.  So I'm not immediately prepared to call it
>> a regression, or a bug.
>
> I believe we have been getting tagged VLAN packets properly
> in our test cases.  We would not be creating any VLAN devices
> in this case, so perhaps the NIC isn't doing any stripping.
>
> To me, it seems like we should get the fully tagged packet
> without having to go muck with aux-data, though it would
> be fine if it were *also* in aux-data.

Given that pf_packet is a portable interface that works on multiple OS's
I tend to agree.  Certainly my users would be happier if they don't
have to change their code and not having to change tcpdump would
also be nice.

I'm not certain exactly where in the code it makes sense to put the
vlan header back on for pf_packet sockets.  The simplest thing would
be just before we run the socket filter.  If we don't do the simplest
thing this raises the question how do we avoid breaking socket filters
that look at the packet data and know there is going to be a vlan
header there.

Still the current situation is better than seeing vlan 0 tagged packets
twice.

My gut feel says if we can cheaply get the socket filters to act like it
sees the vlan tag (where the vlan tag belongs) we should not actually
put the vlan tag back on until we copy the packet to userspace.

Having the perspective of someone who cares whose hardware supports the
vlan tagging optimizations would be nice to have.

> I'll try to test this again this coming week to make sure
> it's working like I think it is.

Thanks.


Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  2:14                                 ` Changli Gao
@ 2011-05-23  9:41                                   ` Eric W. Biederman
  2011-05-23 10:43                                     ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23  9:41 UTC (permalink / raw)
  To: Changli Gao
  Cc: Ben Greear, David Miller, Jiri Pirko, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

Changli Gao <xiaosuo@gmail.com> writes:

> On Mon, May 23, 2011 at 9:45 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>>> In another side, is there a specification which defines the
>>> hw-accel-vlan-rx?
>>
>> I don't know.
>>
>> I have just been trying to clean up the mess since some of the
>> hw-accel-vlan code broke my use case, by delivering packets with
>> priority but no vlan (aka vlan 0 packets) twice to my pf_packet sockets.
>>
>
> OK. But if we have decided to simulate the hw-accel-vlan-rx, I think
> we'd better adjust the place where we put the emulation code. The very
> beginnings of netif_rx() and neif_receive_skb() are better. Then rps
> can support vlan packets without any change.

That sounds nice. Patches are welcome.

In principle it should be doable with some code motion.  I don't think
moving vlan_untag earlier constitutes a bug fix.

In my investigation earlier I found a non-trivial number of paths into
__netif_receive_skb.  So it was not clear to me in the slightest how to
move the check earlier without modifying every networking driver and a
few other pieces of code.

Why should receive packet steering be affected by vlan tags at all?

Eric


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  9:41                                   ` Eric W. Biederman
@ 2011-05-23 10:43                                     ` Jiri Pirko
  2011-05-23 19:48                                       ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-05-23 10:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Changli Gao, Ben Greear, David Miller, Nicolas de Pesloüan,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

Mon, May 23, 2011 at 11:41:22AM CEST, ebiederm@xmission.com wrote:
>Changli Gao <xiaosuo@gmail.com> writes:
>
>> On Mon, May 23, 2011 at 9:45 AM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>>> In another side, is there a specification which defines the
>>>> hw-accel-vlan-rx?
>>>
>>> I don't know.
>>>
>>> I have just been trying to clean up the mess since some of the
>>> hw-accel-vlan code broke my use case, by delivering packets with
>>> priority but no vlan (aka vlan 0 packets) twice to my pf_packet sockets.
>>>
>>
>> OK. But if we have decided to simulate the hw-accel-vlan-rx, I think
>> we'd better adjust the place where we put the emulation code. The very
>> beginnings of netif_rx() and neif_receive_skb() are better. Then rps
>> can support vlan packets without any change.
>
>That sounds nice. Patches are welcome.
>
>In principle it should be doable with some code motion.  I don't think
>moving vlan_untag earlier constitutes a bug fix.

I do not think that is doable. Consider multi tagged packets. The place
just after "another_round" takes care about that.

Btw what's the rationale to move untag to earlier position?

>
>In my investigation earlier I found a non-trivial number of paths into
>__netif_receive_skb.  So it was not clear to me in the slightest how to
>move the check earlier without modifying every networking driver and a
>few other pieces of code.
>
>Why should receive packet steering be affected by vlan tags at all?
>
>Eric
>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23  9:00                             ` Eric W. Biederman
@ 2011-05-23 16:33                               ` Ben Greear
  2011-05-23 19:36                                 ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Ben Greear @ 2011-05-23 16:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Jiri Pirko, Nicolas de Pesloüan, Changli Gao,
	netdev, shemminger, kaber, fubar, eric.dumazet, andy,
	Jesse Gross

On 05/23/2011 02:00 AM, Eric W. Biederman wrote:
> Ben Greear<greearb@candelatech.com>  writes:
>
>> I believe we have been getting tagged VLAN packets properly
>> in our test cases.  We would not be creating any VLAN devices
>> in this case, so perhaps the NIC isn't doing any stripping.
>>
>> To me, it seems like we should get the fully tagged packet
>> without having to go muck with aux-data, though it would
>> be fine if it were *also* in aux-data.
>
> Given that pf_packet is a portable interface that works on multiple OS's
> I tend to agree.  Certainly my users would be happier if they don't
> have to change their code and not having to change tcpdump would
> also be nice.
>
> I'm not certain exactly where in the code it makes sense to put the
> vlan header back on for pf_packet sockets.  The simplest thing would
> be just before we run the socket filter.  If we don't do the simplest
> thing this raises the question how do we avoid breaking socket filters
> that look at the packet data and know there is going to be a vlan
> header there.

That is going to be tricky, since the VLAN header would adjust
offsets and users could be using some filter that uses offsets
with no actual mention of VLANs (but expecting it to take
the VLAN header into account).

> Still the current situation is better than seeing vlan 0 tagged packets
> twice.
>
> My gut feel says if we can cheaply get the socket filters to act like it
> sees the vlan tag (where the vlan tag belongs) we should not actually
> put the vlan tag back on until we copy the packet to userspace.

Maybe keep a count of how many sockets with filters and/or pf_packet
sockets are open, and how many things are registered in
the 'ptype_all' chain, and only re-add (or never remove) the header if that is > 0?

(And, let the bridging and other kernel logic deal with vlans
via auxillary methods as well as checking in-line headers.)

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 16:33                               ` Ben Greear
@ 2011-05-23 19:36                                 ` Nicolas de Pesloüan
  2011-05-23 20:24                                   ` Ben Greear
  0 siblings, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-23 19:36 UTC (permalink / raw)
  To: Ben Greear
  Cc: Eric W. Biederman, David Miller, Jiri Pirko, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

Le 23/05/2011 18:33, Ben Greear a écrit :
> On 05/23/2011 02:00 AM, Eric W. Biederman wrote:
>> Ben Greear<greearb@candelatech.com> writes:
>>
>>> I believe we have been getting tagged VLAN packets properly
>>> in our test cases. We would not be creating any VLAN devices
>>> in this case, so perhaps the NIC isn't doing any stripping.
>>>
>>> To me, it seems like we should get the fully tagged packet
>>> without having to go muck with aux-data, though it would
>>> be fine if it were *also* in aux-data.
>>
>> Given that pf_packet is a portable interface that works on multiple OS's
>> I tend to agree. Certainly my users would be happier if they don't
>> have to change their code and not having to change tcpdump would
>> also be nice.
>>
>> I'm not certain exactly where in the code it makes sense to put the
>> vlan header back on for pf_packet sockets. The simplest thing would
>> be just before we run the socket filter. If we don't do the simplest
>> thing this raises the question how do we avoid breaking socket filters
>> that look at the packet data and know there is going to be a vlan
>> header there.
>
> That is going to be tricky, since the VLAN header would adjust
> offsets and users could be using some filter that uses offsets
> with no actual mention of VLANs (but expecting it to take
> the VLAN header into account).
>
>> Still the current situation is better than seeing vlan 0 tagged packets
>> twice.
>>
>> My gut feel says if we can cheaply get the socket filters to act like it
>> sees the vlan tag (where the vlan tag belongs) we should not actually
>> put the vlan tag back on until we copy the packet to userspace.
>
> Maybe keep a count of how many sockets with filters and/or pf_packet
> sockets are open, and how many things are registered in
> the 'ptype_all' chain, and only re-add (or never remove) the header if that is > 0?
>
> (And, let the bridging and other kernel logic deal with vlans
> via auxillary methods as well as checking in-line headers.)

Well, this doesn't sound very different from my previous proposal: if a protocol handler is 
registered at parent interface level, can't we simply assume this protocol handler expect the raw 
packet?

Also, I think the main problem is that ptype_all and ptype_base deliveries are different in 
__netif_receive_skb, for no good reason (at least, to my knowledge).

In http://patchwork.ozlabs.org/patch/85578/, I proposed to process protocol handlers that are 
registered on a particular interface inside the loop, then process protocol handlers registered to 
NULL upon exit from the loop.

That way, we would have the opportunity to deliver different skb (tagged/untagged) to different 
level of interface, while we cross the interface stack.

I know there is a possible performance penalty in this implementation, but this can be fixed by 
properly ordering the ptype_all/ptype_base list.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 10:43                                     ` Jiri Pirko
@ 2011-05-23 19:48                                       ` Nicolas de Pesloüan
  2011-05-24  5:58                                         ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-23 19:48 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Eric W. Biederman, Changli Gao, Ben Greear, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

Le 23/05/2011 12:43, Jiri Pirko a écrit :
> Mon, May 23, 2011 at 11:41:22AM CEST, ebiederm@xmission.com wrote:
>> Changli Gao<xiaosuo@gmail.com>  writes:
>>
>>> On Mon, May 23, 2011 at 9:45 AM, Eric W. Biederman
>>> <ebiederm@xmission.com>  wrote:
>>>>> In another side, is there a specification which defines the
>>>>> hw-accel-vlan-rx?
>>>>
>>>> I don't know.
>>>>
>>>> I have just been trying to clean up the mess since some of the
>>>> hw-accel-vlan code broke my use case, by delivering packets with
>>>> priority but no vlan (aka vlan 0 packets) twice to my pf_packet sockets.
>>>>
>>>
>>> OK. But if we have decided to simulate the hw-accel-vlan-rx, I think
>>> we'd better adjust the place where we put the emulation code. The very
>>> beginnings of netif_rx() and neif_receive_skb() are better. Then rps
>>> can support vlan packets without any change.
>>
>> That sounds nice. Patches are welcome.
>>
>> In principle it should be doable with some code motion.  I don't think
>> moving vlan_untag earlier constitutes a bug fix.
>
> I do not think that is doable. Consider multi tagged packets. The place
> just after "another_round" takes care about that.
>
> Btw what's the rationale to move untag to earlier position?

Maybe simply because we try to mimic hw-accel, and hw-accel untagging definitely happens before we 
enter __netif_receive_skb and only happens once.

So having software untagging inside the __netif_receive_skb loop looks different.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 19:36                                 ` Nicolas de Pesloüan
@ 2011-05-23 20:24                                   ` Ben Greear
  2011-05-23 21:00                                     ` Stephen Hemminger
  0 siblings, 1 reply; 92+ messages in thread
From: Ben Greear @ 2011-05-23 20:24 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Eric W. Biederman, David Miller, Jiri Pirko, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

On 05/23/2011 12:36 PM, Nicolas de Pesloüan wrote:
> Le 23/05/2011 18:33, Ben Greear a écrit :
>> On 05/23/2011 02:00 AM, Eric W. Biederman wrote:
>>> Ben Greear<greearb@candelatech.com> writes:
>>>
>>>> I believe we have been getting tagged VLAN packets properly
>>>> in our test cases. We would not be creating any VLAN devices
>>>> in this case, so perhaps the NIC isn't doing any stripping.
>>>>
>>>> To me, it seems like we should get the fully tagged packet
>>>> without having to go muck with aux-data, though it would
>>>> be fine if it were *also* in aux-data.
>>>
>>> Given that pf_packet is a portable interface that works on multiple OS's
>>> I tend to agree. Certainly my users would be happier if they don't
>>> have to change their code and not having to change tcpdump would
>>> also be nice.
>>>
>>> I'm not certain exactly where in the code it makes sense to put the
>>> vlan header back on for pf_packet sockets. The simplest thing would
>>> be just before we run the socket filter. If we don't do the simplest
>>> thing this raises the question how do we avoid breaking socket filters
>>> that look at the packet data and know there is going to be a vlan
>>> header there.
>>
>> That is going to be tricky, since the VLAN header would adjust
>> offsets and users could be using some filter that uses offsets
>> with no actual mention of VLANs (but expecting it to take
>> the VLAN header into account).
>>
>>> Still the current situation is better than seeing vlan 0 tagged packets
>>> twice.
>>>
>>> My gut feel says if we can cheaply get the socket filters to act like it
>>> sees the vlan tag (where the vlan tag belongs) we should not actually
>>> put the vlan tag back on until we copy the packet to userspace.
>>
>> Maybe keep a count of how many sockets with filters and/or pf_packet
>> sockets are open, and how many things are registered in
>> the 'ptype_all' chain, and only re-add (or never remove) the header if
>> that is > 0?
>>
>> (And, let the bridging and other kernel logic deal with vlans
>> via auxillary methods as well as checking in-line headers.)
>
> Well, this doesn't sound very different from my previous proposal: if a
> protocol handler is registered at parent interface level, can't we
> simply assume this protocol handler expect the raw packet?

Well, yes..unless perhaps when the REORDER_HDR flag is enabled.

As for when the tag is added/removed, as long as we don't end up doing
a remove and then an add (in software), and as long as the pkt is correct
going to user-space, it doesn't matter to me.

It would be lame to remove it in software and then re-add it,
unless absolutely required for some reason.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 20:24                                   ` Ben Greear
@ 2011-05-23 21:00                                     ` Stephen Hemminger
  2011-05-23 21:20                                       ` David Miller
  0 siblings, 1 reply; 92+ messages in thread
From: Stephen Hemminger @ 2011-05-23 21:00 UTC (permalink / raw)
  To: Ben Greear
  Cc: Nicolas de Pesloüan, Eric W. Biederman, David Miller,
	Jiri Pirko, Changli Gao, netdev, kaber, fubar, eric.dumazet,
	andy, Jesse Gross


> Well, yes..unless perhaps when the REORDER_HDR flag is enabled.
> 
> As for when the tag is added/removed, as long as we don't end up doing
> a remove and then an add (in software), and as long as the pkt is correct
> going to user-space, it doesn't matter to me.
> 
> It would be lame to remove it in software and then re-add it,
> unless absolutely required for some reason.

IMHO the REORDER_HDR flag was a design mistake. It means supporting
two different API's for the application. For a packet capture application
it means the format of the packet will be different based on how
the VLAN was created. And the vlan was created outside the application.

If there was a requirement to support both formats, then it should
be a property of the AF_PACKET socket.  The application can then do
an setsockopt() to control the format of the data. The issue is
for things like mmap/AF_PACKET the re-inserted form will be slower
since data has to be copied.



^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 21:00                                     ` Stephen Hemminger
@ 2011-05-23 21:20                                       ` David Miller
  2011-05-23 22:05                                         ` Eric W. Biederman
  2011-05-24  0:11                                         ` [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check Eric W. Biederman
  0 siblings, 2 replies; 92+ messages in thread
From: David Miller @ 2011-05-23 21:20 UTC (permalink / raw)
  To: shemminger
  Cc: greearb, nicolas.2p.debian, ebiederm, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 23 May 2011 14:00:48 -0700

> IMHO the REORDER_HDR flag was a design mistake. It means supporting
> two different API's for the application. For a packet capture application
> it means the format of the packet will be different based on how
> the VLAN was created. And the vlan was created outside the application.
> 
> If there was a requirement to support both formats, then it should
> be a property of the AF_PACKET socket.  The application can then do
> an setsockopt() to control the format of the data. The issue is
> for things like mmap/AF_PACKET the re-inserted form will be slower
> since data has to be copied.

Indeed, it was a foolish thing to support in the first place.

I guess we couldn't see the hw offloads in the future at that
point.

I'm all for finding a way to deprecate this over time.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 21:20                                       ` David Miller
@ 2011-05-23 22:05                                         ` Eric W. Biederman
  2011-05-23 22:16                                           ` Stephen Hemminger
                                                             ` (2 more replies)
  2011-05-24  0:11                                         ` [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check Eric W. Biederman
  1 sibling, 3 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23 22:05 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

David Miller <davem@davemloft.net> writes:

> From: Stephen Hemminger <shemminger@linux-foundation.org>
> Date: Mon, 23 May 2011 14:00:48 -0700
>
>> IMHO the REORDER_HDR flag was a design mistake. It means supporting
>> two different API's for the application. For a packet capture application
>> it means the format of the packet will be different based on how
>> the VLAN was created. And the vlan was created outside the application.
>> 
>> If there was a requirement to support both formats, then it should
>> be a property of the AF_PACKET socket.  The application can then do
>> an setsockopt() to control the format of the data. The issue is
>> for things like mmap/AF_PACKET the re-inserted form will be slower
>> since data has to be copied.
>
> Indeed, it was a foolish thing to support in the first place.
>
> I guess we couldn't see the hw offloads in the future at that
> point.
>
> I'm all for finding a way to deprecate this over time.


There are two very similar issues here, that affect two
slightly different cases.

Let's assume the configuration is:
eth0 - Talks to the outside world.
vlan2000 - Is the vlan interface of interest.


With REORDER_HDR set when I tcpdump on vlan2000 I don't see the
vlan header, however I continue to see the vlan header when I tcpdump on eth0.

With vlan hardware acceleration.  When I tcpdump on eth0 I don't
see the vlan header.  Nor do I see the vlan header when I tcpdump
on vlan2000.


Right now both cases are problematic in Linus's tree.

For inbound packets We are testing the wrong interface for the to see if
we should play with REORDER_HDR.

Hardware acceleration vlan tagging we are hiding the vlan header from
portable applications that simply dump eth0.  Which is non-intuitive
and apparent to everyone now that this happens on both software and
hardware interfaces.


So we have a couple of questions.
1) Do we revert the software emulation of vlan header tagging to fix
   it's implementation issues in 2.6.40?

   The big issues.
   - vlan_untag is currently reading undefined data.
   - Clearing REORDER_HDR no longer works.

   I expect the issues are small enough that we can address them now.

2) Do we instead move the software implementation of vlan header
   acceleration back into the net-next.

3) What do we do with pf_packet and vlan hardware acceleration when
   dumping not the vlan interface but the interface below the vlan
   interface?

   Do we provide an option to keep the vlan header?  Should that option
   be on by default?

Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 22:05                                         ` Eric W. Biederman
@ 2011-05-23 22:16                                           ` Stephen Hemminger
  2011-05-23 22:48                                             ` Eric W. Biederman
  2011-05-23 22:23                                           ` Ben Greear
  2011-05-24  5:19                                           ` David Miller
  2 siblings, 1 reply; 92+ messages in thread
From: Stephen Hemminger @ 2011-05-23 22:16 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, greearb, nicolas.2p.debian, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

On Mon, 23 May 2011 15:05:54 -0700
ebiederm@xmission.com (Eric W. Biederman) wrote:

> David Miller <davem@davemloft.net> writes:
> 
> > From: Stephen Hemminger <shemminger@linux-foundation.org>
> > Date: Mon, 23 May 2011 14:00:48 -0700
> >
> >> IMHO the REORDER_HDR flag was a design mistake. It means supporting
> >> two different API's for the application. For a packet capture application
> >> it means the format of the packet will be different based on how
> >> the VLAN was created. And the vlan was created outside the application.
> >> 
> >> If there was a requirement to support both formats, then it should
> >> be a property of the AF_PACKET socket.  The application can then do
> >> an setsockopt() to control the format of the data. The issue is
> >> for things like mmap/AF_PACKET the re-inserted form will be slower
> >> since data has to be copied.
> >
> > Indeed, it was a foolish thing to support in the first place.
> >
> > I guess we couldn't see the hw offloads in the future at that
> > point.
> >
> > I'm all for finding a way to deprecate this over time.
> 
> 
> There are two very similar issues here, that affect two
> slightly different cases.
> 
> Let's assume the configuration is:
> eth0 - Talks to the outside world.
> vlan2000 - Is the vlan interface of interest.
> 
> 
> With REORDER_HDR set when I tcpdump on vlan2000 I don't see the
> vlan header, however I continue to see the vlan header when I tcpdump on eth0.
> 
> With vlan hardware acceleration.  When I tcpdump on eth0 I don't
> see the vlan header.  Nor do I see the vlan header when I tcpdump
> on vlan2000.
> 
> 
> Right now both cases are problematic in Linus's tree.
> 
> For inbound packets We are testing the wrong interface for the to see if
> we should play with REORDER_HDR.
> 
> Hardware acceleration vlan tagging we are hiding the vlan header from
> portable applications that simply dump eth0.  Which is non-intuitive
> and apparent to everyone now that this happens on both software and
> hardware interfaces.
> 
> 
> So we have a couple of questions.
> 1) Do we revert the software emulation of vlan header tagging to fix
>    it's implementation issues in 2.6.40?
> 
>    The big issues.
>    - vlan_untag is currently reading undefined data.
>    - Clearing REORDER_HDR no longer works.
> 
>    I expect the issues are small enough that we can address them now.
> 
> 2) Do we instead move the software implementation of vlan header
>    acceleration back into the net-next.
> 
> 3) What do we do with pf_packet and vlan hardware acceleration when
>    dumping not the vlan interface but the interface below the vlan
>    interface?
> 
>    Do we provide an option to keep the vlan header?  Should that option
>    be on by default?

Why not just make libpcap smarter? it already has Linux specific
stuff for checksum offload. The portability to BSD for AF_PACKET is
a non-issue.


-- 

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 22:05                                         ` Eric W. Biederman
  2011-05-23 22:16                                           ` Stephen Hemminger
@ 2011-05-23 22:23                                           ` Ben Greear
  2011-05-23 23:02                                             ` Eric W. Biederman
  2011-05-24  5:19                                           ` David Miller
  2 siblings, 1 reply; 92+ messages in thread
From: Ben Greear @ 2011-05-23 22:23 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, nicolas.2p.debian, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

On 05/23/2011 03:05 PM, Eric W. Biederman wrote:
> David Miller<davem@davemloft.net>  writes:
>
>> From: Stephen Hemminger<shemminger@linux-foundation.org>
>> Date: Mon, 23 May 2011 14:00:48 -0700
>>
>>> IMHO the REORDER_HDR flag was a design mistake. It means supporting
>>> two different API's for the application. For a packet capture application
>>> it means the format of the packet will be different based on how
>>> the VLAN was created. And the vlan was created outside the application.
>>>
>>> If there was a requirement to support both formats, then it should
>>> be a property of the AF_PACKET socket.  The application can then do
>>> an setsockopt() to control the format of the data. The issue is
>>> for things like mmap/AF_PACKET the re-inserted form will be slower
>>> since data has to be copied.
>>
>> Indeed, it was a foolish thing to support in the first place.
>>
>> I guess we couldn't see the hw offloads in the future at that
>> point.
>>
>> I'm all for finding a way to deprecate this over time.
>
>
> There are two very similar issues here, that affect two
> slightly different cases.
>
> Let's assume the configuration is:
> eth0 - Talks to the outside world.
> vlan2000 - Is the vlan interface of interest.
>
>
> With REORDER_HDR set when I tcpdump on vlan2000 I don't see the
> vlan header, however I continue to see the vlan header when I tcpdump on eth0.

That seems good.  This is what originally let dhcp function.  Not sure
if it's still required for dhcp, but there could easily be other programs that
have similar issues.

With REORDER_HDR off, we should see vlan tagged packets on both eth0
and vlan2000.

If REORDER_HDR dissappears entirely, I think you have to default to
stripping the header on vlan2000.

>
> With vlan hardware acceleration.  When I tcpdump on eth0 I don't
> see the vlan header.  Nor do I see the vlan header when I tcpdump
> on vlan2000.

I think you should see the header on eth0 regardless of hw acceleration
or not.  Users should not have to know if their NIC/driver supports
vlan tag stripping in one mode or another.


> Right now both cases are problematic in Linus's tree.
>
> For inbound packets We are testing the wrong interface for the to see if
> we should play with REORDER_HDR.
>
> Hardware acceleration vlan tagging we are hiding the vlan header from
> portable applications that simply dump eth0.  Which is non-intuitive
> and apparent to everyone now that this happens on both software and
> hardware interfaces.
>
>
> So we have a couple of questions.
> 1) Do we revert the software emulation of vlan header tagging to fix
>     it's implementation issues in 2.6.40?
>
>     The big issues.
>     - vlan_untag is currently reading undefined data.
>     - Clearing REORDER_HDR no longer works.
>
>     I expect the issues are small enough that we can address them now.
>
> 2) Do we instead move the software implementation of vlan header
>     acceleration back into the net-next.
>
> 3) What do we do with pf_packet and vlan hardware acceleration when
>     dumping not the vlan interface but the interface below the vlan
>     interface?
>
>     Do we provide an option to keep the vlan header?  Should that option
>     be on by default?

At the least we need to have the header kept when using pf_packet on eth0.

I think it's best to have it available on vlan2000, but perhaps have it
stripped by default for backwards compatibility.

Thanks,
Ben


>
> Eric


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 22:16                                           ` Stephen Hemminger
@ 2011-05-23 22:48                                             ` Eric W. Biederman
  0 siblings, 0 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23 22:48 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, greearb, nicolas.2p.debian, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

Stephen Hemminger <shemminger@linux-foundation.org> writes:

> On Mon, 23 May 2011 15:05:54 -0700
> ebiederm@xmission.com (Eric W. Biederman) wrote:
>
>> David Miller <davem@davemloft.net> writes:
>> 
>> > From: Stephen Hemminger <shemminger@linux-foundation.org>
>> > Date: Mon, 23 May 2011 14:00:48 -0700
>> >
>> >> IMHO the REORDER_HDR flag was a design mistake. It means supporting
>> >> two different API's for the application. For a packet capture application
>> >> it means the format of the packet will be different based on how
>> >> the VLAN was created. And the vlan was created outside the application.
>> >> 
>> >> If there was a requirement to support both formats, then it should
>> >> be a property of the AF_PACKET socket.  The application can then do
>> >> an setsockopt() to control the format of the data. The issue is
>> >> for things like mmap/AF_PACKET the re-inserted form will be slower
>> >> since data has to be copied.
>> >
>> > Indeed, it was a foolish thing to support in the first place.
>> >
>> > I guess we couldn't see the hw offloads in the future at that
>> > point.
>> >
>> > I'm all for finding a way to deprecate this over time.
>> 
>> 
>> There are two very similar issues here, that affect two
>> slightly different cases.
>> 
>> Let's assume the configuration is:
>> eth0 - Talks to the outside world.
>> vlan2000 - Is the vlan interface of interest.
>> 
>> 
>> With REORDER_HDR set when I tcpdump on vlan2000 I don't see the
>> vlan header, however I continue to see the vlan header when I tcpdump on eth0.
>> 
>> With vlan hardware acceleration.  When I tcpdump on eth0 I don't
>> see the vlan header.  Nor do I see the vlan header when I tcpdump
>> on vlan2000.
>> 
>> 
>> Right now both cases are problematic in Linus's tree.
>> 
>> For inbound packets We are testing the wrong interface for the to see if
>> we should play with REORDER_HDR.
>> 
>> Hardware acceleration vlan tagging we are hiding the vlan header from
>> portable applications that simply dump eth0.  Which is non-intuitive
>> and apparent to everyone now that this happens on both software and
>> hardware interfaces.
>> 
>> 
>> So we have a couple of questions.
>> 1) Do we revert the software emulation of vlan header tagging to fix
>>    it's implementation issues in 2.6.40?
>> 
>>    The big issues.
>>    - vlan_untag is currently reading undefined data.
>>    - Clearing REORDER_HDR no longer works.
>> 
>>    I expect the issues are small enough that we can address them now.
>> 
>> 2) Do we instead move the software implementation of vlan header
>>    acceleration back into the net-next.
>> 
>> 3) What do we do with pf_packet and vlan hardware acceleration when
>>    dumping not the vlan interface but the interface below the vlan
>>    interface?
>> 
>>    Do we provide an option to keep the vlan header?  Should that option
>>    be on by default?
>
> Why not just make libpcap smarter? it already has Linux specific
> stuff for checksum offload. The portability to BSD for AF_PACKET is
> a non-issue.

That is not an option I had considered, and it sounds intriguing.

Even if libpcap can cope we are still not backwards compatible with
older linux applications.  Some of which work up to 2.6.39 because
the interface the vlan tagged packets come in on does not implement
vlan hardware acceleration.

There is also the issue that socket filters can't currently see
skb->tci so we are really messed up in the case we want vlan hardware
acceleration.

Eric




^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 22:23                                           ` Ben Greear
@ 2011-05-23 23:02                                             ` Eric W. Biederman
  2011-05-24  4:20                                               ` Ben Greear
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-23 23:02 UTC (permalink / raw)
  To: Ben Greear
  Cc: David Miller, shemminger, nicolas.2p.debian, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

Ben Greear <greearb@candelatech.com> writes:

> On 05/23/2011 03:05 PM, Eric W. Biederman wrote:
>
> If REORDER_HDR dissappears entirely, I think you have to default to
> stripping the header on vlan2000.

Which is what patches that started this thread are doing.

>> With vlan hardware acceleration.  When I tcpdump on eth0 I don't
>> see the vlan header.  Nor do I see the vlan header when I tcpdump
>> on vlan2000.
>
> I think you should see the header on eth0 regardless of hw acceleration
> or not.  Users should not have to know if their NIC/driver supports
> vlan tag stripping in one mode or another.

But is it acceptable to fix this in libpcap?

>> 3) What do we do with pf_packet and vlan hardware acceleration when
>>     dumping not the vlan interface but the interface below the vlan
>>     interface?
>>
>>     Do we provide an option to keep the vlan header?  Should that option
>>     be on by default?
>
> At the least we need to have the header kept when using pf_packet on
> eth0.

Start with the assumption that vlan hardware acceleration is in place
and the hardware has stripped the vlan tag and put it in skb->tci.
Sure there are dumb divers out there that don't do this today but
either we need to throw out vlan hardware acceleration completely
or emulate it in software because otherwise the test matrix is just
too big.

> I think it's best to have it available on vlan2000, but perhaps have it
> stripped by default for backwards compatibility.

Anything that deals with raw packets pretty much breaks if you don't
strip the vlan header from visibility on vlan2000.  Plus you loose
any advantage there is from vlan hardware acceleration, which is
available on must modern NICs.  So I don't think we can seriously
consider having the vlan header for present on the vlan2000 device.

All that is interesting is what to do with eth0, and pf_packet sockets.
And the only question that seems really interesting there is do we put
the vlan header back on with libpcap magic or with pf_packet logic.

We have to start with a the assumption that we come in with a pf_packet
with the vlan tag only in skb->tci.

Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

* [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check.
  2011-05-23 21:20                                       ` David Miller
  2011-05-23 22:05                                         ` Eric W. Biederman
@ 2011-05-24  0:11                                         ` Eric W. Biederman
  2011-05-24  4:54                                           ` David Miller
  1 sibling, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-24  0:11 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse


Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag but
rather in vlan_do_receive.  In attempting to test VLAN_FLAG_REORDER_HDR
we are attempting to deference vlan private network device members a non
vlan device, which is just wrong.  Move the test into vlan_do_receive so
that the vlan header will not be properly put on the packet in the case
of vlan header acceleration.

As we remove the check from vlan_check_reorder_header rename it
vlan_reorder_header to keep the naming clean.

Hopefully at somepoint we will just declare the case where
VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove the code.
Until then this keeps it working correctly.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/if_vlan.h |   25 ++++++++++++++++++++++---
 net/8021q/vlan_core.c   |   25 ++++++++++++++-----------
 2 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 290bd8a..26f3c92 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -220,7 +220,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
 }
 
 /**
- * __vlan_put_tag - regular VLAN tag inserting
+ * vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_tci: VLAN TCI to insert
  *
@@ -229,8 +229,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
  *
  * Following the skb_unshare() example, in case of error, the calling function
  * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used for receive.
  */
-static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -250,8 +252,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
 
-	skb->protocol = htons(ETH_P_8021Q);
+	return skb;
+}
 
+/**
+ * __vlan_put_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ */
+static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+{
+	skb = vlan_insert_tag(skb, vlan_tci);
+	if (skb)
+		skb->protocol = htons(ETH_P_8021Q);
 	return skb;
 }
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc..c08dae7 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -23,6 +23,11 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		return false;
 
 	skb->dev = vlan_dev;
+	if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		if (!skb)
+			return false;
+	}
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
@@ -89,17 +94,15 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_frags);
 
-static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		skb = NULL;
+	if (skb) {
+		/* Lifted from Gleb's VLAN code... */
+		memmove(skb->data - ETH_HLEN,
+			skb->data - VLAN_ETH_HLEN, 12);
+		skb->mac_header += VLAN_HLEN;
 	}
 	return skb;
 }
@@ -161,7 +164,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
+	skb = vlan_reorder_header(skb);
 	if (unlikely(!skb))
 		goto err_free;
 
-- 
1.7.5.1.217.g4e3aa


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 23:02                                             ` Eric W. Biederman
@ 2011-05-24  4:20                                               ` Ben Greear
  2011-05-24  7:11                                                 ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Ben Greear @ 2011-05-24  4:20 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, nicolas.2p.debian, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

On 05/23/2011 04:02 PM, Eric W. Biederman wrote:
> Ben Greear<greearb@candelatech.com>  writes:
>
>> On 05/23/2011 03:05 PM, Eric W. Biederman wrote:
>>
>> If REORDER_HDR dissappears entirely, I think you have to default to
>> stripping the header on vlan2000.
>
> Which is what patches that started this thread are doing.
>
>>> With vlan hardware acceleration.  When I tcpdump on eth0 I don't
>>> see the vlan header.  Nor do I see the vlan header when I tcpdump
>>> on vlan2000.
>>
>> I think you should see the header on eth0 regardless of hw acceleration
>> or not.  Users should not have to know if their NIC/driver supports
>> vlan tag stripping in one mode or another.
>
> But is it acceptable to fix this in libpcap?

No, because libpcap is not the only thing that opens packet
sockets.  Our user-space network emulator bridges two ethernet
interfaces using packet-sockets.  We have to have the VLAN header
available to properly bridge a VLAN packet out the other side.
It would be possible to use some aux data, but I think that is
a very nasty hack.

Surely we and libpcap are not the only users...

Also, anything using a packet filter might need the header in place.

>>> 3) What do we do with pf_packet and vlan hardware acceleration when
>>>      dumping not the vlan interface but the interface below the vlan
>>>      interface?
>>>
>>>      Do we provide an option to keep the vlan header?  Should that option
>>>      be on by default?
>>
>> At the least we need to have the header kept when using pf_packet on
>> eth0.
>
> Start with the assumption that vlan hardware acceleration is in place
> and the hardware has stripped the vlan tag and put it in skb->tci.
> Sure there are dumb divers out there that don't do this today but
> either we need to throw out vlan hardware acceleration completely
> or emulate it in software because otherwise the test matrix is just
> too big.

It's a binary case..either tag exists in skb or it doesn't.  It might double
the test cases, but that is still a reasonable number.  We should be able
to write a test app to mostly automate testing for that matter.

I still haven't had time to do detailed testing, but I *believe* that
even smart drivers like e1000e and igb don't strip tags if you do not
have any VLANs created on the NIC.  So, sometimes you get tags on
eth0 even when using fancy NICs.

>
>> I think it's best to have it available on vlan2000, but perhaps have it
>> stripped by default for backwards compatibility.
>
> Anything that deals with raw packets pretty much breaks if you don't
> strip the vlan header from visibility on vlan2000.  Plus you loose
> any advantage there is from vlan hardware acceleration, which is
> available on must modern NICs.  So I don't think we can seriously
> consider having the vlan header for present on the vlan2000 device.

I'm fine with stripping them from the vlan2000 frame, though
hopefully one day we can optimize to only do this if there
are actual consumers of the raw packet in user-space.

> All that is interesting is what to do with eth0, and pf_packet sockets.
> And the only question that seems really interesting there is do we put
> the vlan header back on with libpcap magic or with pf_packet logic.
>
> We have to start with a the assumption that we come in with a pf_packet
> with the vlan tag only in skb->tci.

Then I think we should put it back with pf_packet logic.  Possibly with
a per-socket option to disable this and send it as only aux data if that
is more efficient.

If it turns out the NIC is not stripping VLAN tags for whatever reason,
we might be able to optimize things so that it never does the HW emulation
so that it never has to un-do it later.

If we can agree on the desired behaviour, I'll put some effort into
a test system that can test a 4-port system:

eth0 { loopback cable }  eth1 { pf-socket based software bridge } eth2 { looped-back-cable } eth3

Could iterate through a matrix of vlans in various places and test each time to make sure
eth0 and eth3 receive expected data.  I could split the sender/consumer logic into one process
and the bridge into another so that it could be done on two systems with 2 ports each.

Thanks,
Ben

>
> Eric


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check.
  2011-05-24  0:11                                         ` [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check Eric W. Biederman
@ 2011-05-24  4:54                                           ` David Miller
  2011-05-24  6:18                                             ` Eric W. Biederman
  0 siblings, 1 reply; 92+ messages in thread
From: David Miller @ 2011-05-24  4:54 UTC (permalink / raw)
  To: ebiederm
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Mon, 23 May 2011 17:11:51 -0700

> diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
> index 41495dc..c08dae7 100644
> --- a/net/8021q/vlan_core.c
> +++ b/net/8021q/vlan_core.c
> @@ -23,6 +23,11 @@ bool vlan_do_receive(struct sk_buff **skbp)
>  		return false;
>  
>  	skb->dev = vlan_dev;
> +	if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
> +		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
> +		if (!skb)
> +			return false;
> +	}
>  	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
>  	skb->vlan_tci = 0;
>  

Below we do a eth_hdr(skb) based check on the ethernet address in the
PACKET_OTHERHOST case.

Won't this VLAN tag insertion change skb->mac_header and thus screw up
that test?

Touching this code requires surgical precision and long audits, because
there are so many subtble dependencies all over the place like this.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 22:05                                         ` Eric W. Biederman
  2011-05-23 22:16                                           ` Stephen Hemminger
  2011-05-23 22:23                                           ` Ben Greear
@ 2011-05-24  5:19                                           ` David Miller
  2011-05-24  7:56                                             ` Eric W. Biederman
  2011-05-24 15:44                                             ` Ben Greear
  2 siblings, 2 replies; 92+ messages in thread
From: David Miller @ 2011-05-24  5:19 UTC (permalink / raw)
  To: ebiederm
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Mon, 23 May 2011 15:05:54 -0700

> 3) What do we do with pf_packet and vlan hardware acceleration when
>    dumping not the vlan interface but the interface below the vlan
>    interface?
> 
>    Do we provide an option to keep the vlan header?  Should that option
>    be on by default?
> 

The vlan_tci in the V2 pf_packet auxdata was intended for this
purpose.

So no matter what variant of behavior is occurring, apps can properly
reconstitute the VLAN header if they inspect the vlan_tci in the
auxdata.

The only thing that seems to be missing is an indication that a VLAN
tag was present at all, ie. vlan_tx_tag_present(), in this manner an
application could then differentiate between no VLAN header and a VLAN
tag of zero.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-23 19:48                                       ` Nicolas de Pesloüan
@ 2011-05-24  5:58                                         ` Jiri Pirko
  2011-05-24  7:19                                           ` Nicolas de Pesloüan
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-05-24  5:58 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Eric W. Biederman, Changli Gao, Ben Greear, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

Mon, May 23, 2011 at 09:48:12PM CEST, nicolas.2p.debian@gmail.com wrote:
>Le 23/05/2011 12:43, Jiri Pirko a écrit :
>>Mon, May 23, 2011 at 11:41:22AM CEST, ebiederm@xmission.com wrote:
>>>Changli Gao<xiaosuo@gmail.com>  writes:
>>>
>>>>On Mon, May 23, 2011 at 9:45 AM, Eric W. Biederman
>>>><ebiederm@xmission.com>  wrote:
>>>>>>In another side, is there a specification which defines the
>>>>>>hw-accel-vlan-rx?
>>>>>
>>>>>I don't know.
>>>>>
>>>>>I have just been trying to clean up the mess since some of the
>>>>>hw-accel-vlan code broke my use case, by delivering packets with
>>>>>priority but no vlan (aka vlan 0 packets) twice to my pf_packet sockets.
>>>>>
>>>>
>>>>OK. But if we have decided to simulate the hw-accel-vlan-rx, I think
>>>>we'd better adjust the place where we put the emulation code. The very
>>>>beginnings of netif_rx() and neif_receive_skb() are better. Then rps
>>>>can support vlan packets without any change.
>>>
>>>That sounds nice. Patches are welcome.
>>>
>>>In principle it should be doable with some code motion.  I don't think
>>>moving vlan_untag earlier constitutes a bug fix.
>>
>>I do not think that is doable. Consider multi tagged packets. The place
>>just after "another_round" takes care about that.
>>
>>Btw what's the rationale to move untag to earlier position?
>
>Maybe simply because we try to mimic hw-accel, and hw-accel untagging
>definitely happens before we enter __netif_receive_skb and only
>happens once.
>
>So having software untagging inside the __netif_receive_skb loop looks different.

I understand. But what code prior to current vlan_untag position needs
to see the skb untagged?

>
>	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check.
  2011-05-24  4:54                                           ` David Miller
@ 2011-05-24  6:18                                             ` Eric W. Biederman
  2011-05-24  6:24                                               ` David Miller
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-24  6:18 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

David Miller <davem@davemloft.net> writes:

> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Mon, 23 May 2011 17:11:51 -0700
>
>> diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
>> index 41495dc..c08dae7 100644
>> --- a/net/8021q/vlan_core.c
>> +++ b/net/8021q/vlan_core.c
>> @@ -23,6 +23,11 @@ bool vlan_do_receive(struct sk_buff **skbp)
>>  		return false;
>>  
>>  	skb->dev = vlan_dev;
>> +	if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
>> +		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
>> +		if (!skb)
>> +			return false;
>> +	}
>>  	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
>>  	skb->vlan_tci = 0;
>>  
>
> Below we do a eth_hdr(skb) based check on the ethernet address in the
> PACKET_OTHERHOST case.
>
> Won't this VLAN tag insertion change skb->mac_header and thus screw up
> that test?

Yes the vlan tag insertion does in fact change the skb->mac_header
index.  However we also move the location of the data as well, so
I fail to see any reason to be concerned about the PACKET_OTHERHOST
case.  Things were moved around and we updated the appropriate references.

Feel free to read through the code, to convince yourself it is correct.
In addition the code is untouched from the vlan header insertion for
emulation of vlan header acceleration in dev_hard_start_xmit() which
presumably has been working for quite awhile.

> Touching this code requires surgical precision and long audits, because
> there are so many subtble dependencies all over the place like this.

Certainly.

I think you will find that I have applied great precision and restraint
in this case. 

Eric



^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check.
  2011-05-24  6:18                                             ` Eric W. Biederman
@ 2011-05-24  6:24                                               ` David Miller
  2011-05-24  7:38                                                 ` Eric W. Biederman
  0 siblings, 1 reply; 92+ messages in thread
From: David Miller @ 2011-05-24  6:24 UTC (permalink / raw)
  To: ebiederm
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Mon, 23 May 2011 23:18:02 -0700

> Feel free to read through the code, to convince yourself it is correct.
> In addition the code is untouched from the vlan header insertion for
> emulation of vlan header acceleration in dev_hard_start_xmit() which
> presumably has been working for quite awhile.

I'm not keeping code there that does eth_hdr(skb)->foo when there
can be either a vlan_hdr(skb) or a eth_hdr(skb) there.

That's just asking for trouble.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-24  4:20                                               ` Ben Greear
@ 2011-05-24  7:11                                                 ` Nicolas de Pesloüan
  2011-05-24  7:44                                                   ` Michał Mirosław
  2011-05-24 15:17                                                   ` Ben Greear
  0 siblings, 2 replies; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-24  7:11 UTC (permalink / raw)
  To: Ben Greear
  Cc: Eric W. Biederman, David Miller, shemminger, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

Le 24/05/2011 06:20, Ben Greear a écrit :
<snip>

> Then I think we should put it back with pf_packet logic. Possibly with
> a per-socket option to disable this and send it as only aux data if that
> is more efficient.

Why would we need a per-socket option for that?

If the socket listen on eth0, it probably expect to receive the raw packet. If it happens to expect 
the untagged packet, it should listen on vlan2000.

The only reason I can imagine to listen on eth0 while expecting the packet to be untagged is to 
listen to several VLAN at the same time on a single socket, while still expecting the kernel or the 
NIC to extract the vlan ID for us. But I don't have a real life use case for this in mind.

And maybe, for that particular situation, we should have a special vlan interface, with wildcard 
vlan ID:

        +--eth0.100
        |
eth0 --+--eth0.200
        |
        +--eth0.any

- Someone listening on eth0 would receive raw packet.
- Someone listening on eth0.100/eth0.200 would receive untagged packets originally having 100/200 as 
the VLAN ID.
- Someone listening on eth0.any would receive untagged packets originally having any VLAN ID and 
would not receive non-originally-tagged packets.

> If it turns out the NIC is not stripping VLAN tags for whatever reason,
> we might be able to optimize things so that it never does the HW emulation
> so that it never has to un-do it later.

It might be very tricky to avoid any do-undo-redo situation. I will latter try and describe all the 
possible situations: with/without hw-accel, with/without a protocol handler registered on the parent 
interface, with/without a child interface build on top of a particular parent interface and with the 
corresponding VLAN ID...

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-24  5:58                                         ` Jiri Pirko
@ 2011-05-24  7:19                                           ` Nicolas de Pesloüan
  0 siblings, 0 replies; 92+ messages in thread
From: Nicolas de Pesloüan @ 2011-05-24  7:19 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Eric W. Biederman, Changli Gao, Ben Greear, David Miller, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

Le 24/05/2011 07:58, Jiri Pirko a écrit :
<snip>

>>> Btw what's the rationale to move untag to earlier position?
>>
>> Maybe simply because we try to mimic hw-accel, and hw-accel untagging
>> definitely happens before we enter __netif_receive_skb and only
>> happens once.
>>
>> So having software untagging inside the __netif_receive_skb loop looks different.
>
> I understand. But what code prior to current vlan_untag position needs
> to see the skb untagged?

Any protocol handlers (ptype_all or ptype_base) registered on the parent interface may need to see 
the skb untagged, for all the reasons given in this thread. Arguably, doing software untagging 
earlier wouldn't help. :-) We need a strong logic to decide whether and when to untag and/or 
possibly re-tag.

	Nicolas.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check.
  2011-05-24  6:24                                               ` David Miller
@ 2011-05-24  7:38                                                 ` Eric W. Biederman
  2011-06-02  3:59                                                   ` David Miller
  0 siblings, 1 reply; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-24  7:38 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

David Miller <davem@davemloft.net> writes:

> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Mon, 23 May 2011 23:18:02 -0700
>
>> Feel free to read through the code, to convince yourself it is correct.
>> In addition the code is untouched from the vlan header insertion for
>> emulation of vlan header acceleration in dev_hard_start_xmit() which
>> presumably has been working for quite awhile.
>
> I'm not keeping code there that does eth_hdr(skb)->foo when there
> can be either a vlan_hdr(skb) or a eth_hdr(skb) there.
>
> That's just asking for trouble.

How so?  eth_hdr(skb) is a proper subset of vlan_hdr(skb)?

vlan_insert_tag() moves the ethernet header to make space for the vlan
header after it, but before the rest of the data.  With the appropriate
fixups applied to the skb->mac_pointer.

I can see not leaving a vlan_hdr(skb) reference, but beyond that I'm
not seeing the problem.

Certainly we need to do the insert before we update the statics or
we will get rx_bytes wrong.

Would you be happier if the pkt_type check was moved earlier in the
function like say:

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c08dae7..7571637 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -22,21 +22,6 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	if (unlikely(!skb))
 		return false;
 
-	skb->dev = vlan_dev;
-	if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
-		if (!skb)
-			return false;
-	}
-	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
-	skb->vlan_tci = 0;
-
-	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
-
-	u64_stats_update_begin(&rx_stats->syncp);
-	rx_stats->rx_packets++;
-	rx_stats->rx_bytes += skb->len;
-
 	switch (skb->pkt_type) {
 	case PACKET_BROADCAST:
 		break;
@@ -52,6 +37,21 @@ bool vlan_do_receive(struct sk_buff **skbp)
 			skb->pkt_type = PACKET_HOST;
 		break;
 	}
+
+	skb->dev = vlan_dev;
+	if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		if (!skb)
+			return false;
+	}
+	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
+	skb->vlan_tci = 0;
+
+	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
+
+	u64_stats_update_begin(&rx_stats->syncp);
+	rx_stats->rx_packets++;
+	rx_stats->rx_bytes += skb->len;
 	u64_stats_update_end(&rx_stats->syncp);
 
 	return true;

Eric

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-24  7:11                                                 ` Nicolas de Pesloüan
@ 2011-05-24  7:44                                                   ` Michał Mirosław
  2011-05-24 15:17                                                   ` Ben Greear
  1 sibling, 0 replies; 92+ messages in thread
From: Michał Mirosław @ 2011-05-24  7:44 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Ben Greear, Eric W. Biederman, David Miller, shemminger, jpirko,
	xiaosuo, netdev, kaber, fubar, eric.dumazet, andy, jesse

2011/5/24 Nicolas de Pesloüan <nicolas.2p.debian@gmail.com>:
>> If it turns out the NIC is not stripping VLAN tags for whatever reason,
>> we might be able to optimize things so that it never does the HW emulation
>> so that it never has to un-do it later.
> It might be very tricky to avoid any do-undo-redo situation. I will latter
> try and describe all the possible situations: with/without hw-accel,
> with/without a protocol handler registered on the parent interface,
> with/without a child interface build on top of a particular parent interface
> and with the corresponding VLAN ID...

Hardware tag stripping could be disabled whenever AF_PACKET listener
is present on the base interface. It should be easy now with the new
features handling.

BTW, what's the performance gain from hw tag stripping? Besides saving
12-byte memmove() in one cacheline?

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-24  5:19                                           ` David Miller
@ 2011-05-24  7:56                                             ` Eric W. Biederman
  2011-05-24 15:44                                             ` Ben Greear
  1 sibling, 0 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-05-24  7:56 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

David Miller <davem@davemloft.net> writes:

> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Mon, 23 May 2011 15:05:54 -0700
>
>> 3) What do we do with pf_packet and vlan hardware acceleration when
>>    dumping not the vlan interface but the interface below the vlan
>>    interface?
>> 
>>    Do we provide an option to keep the vlan header?  Should that option
>>    be on by default?
>> 
>
> The vlan_tci in the V2 pf_packet auxdata was intended for this
> purpose.
>
> So no matter what variant of behavior is occurring, apps can properly
> reconstitute the VLAN header if they inspect the vlan_tci in the
> auxdata.

It sucks a little bit to deal with that but that is fair.

> The only thing that seems to be missing is an indication that a VLAN
> tag was present at all, ie. vlan_tx_tag_present(), in this manner an
> application could then differentiate between no VLAN header and a VLAN
> tag of zero.

Good point.

I had seen that we were putting the data there but I missed the fact
that we deleted the indicator.  That makes packets not destined for a
vlan but that just have priority bits set hard to detect.  Especially
if the priority bits are 0.

Would it cause many problems if we added used tp_status to hold a flag
indicating the presence of a vlan header?

We also have an issue that the socket filter doesn't have access to any
of the vlan information at present.

Eric


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-24  7:11                                                 ` Nicolas de Pesloüan
  2011-05-24  7:44                                                   ` Michał Mirosław
@ 2011-05-24 15:17                                                   ` Ben Greear
  1 sibling, 0 replies; 92+ messages in thread
From: Ben Greear @ 2011-05-24 15:17 UTC (permalink / raw)
  To: Nicolas de Pesloüan
  Cc: Eric W. Biederman, David Miller, shemminger, jpirko, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

On 05/24/2011 12:11 AM, Nicolas de Pesloüan wrote:
> Le 24/05/2011 06:20, Ben Greear a écrit :
> <snip>
>
>> Then I think we should put it back with pf_packet logic. Possibly with
>> a per-socket option to disable this and send it as only aux data if that
>> is more efficient.
>
> Why would we need a per-socket option for that?

I'd want the tag available at all times, but if it is a performance win to
put the tag in 'aux data', then a socket could opt for that method.  Otherwise,
the tag should always be inline in the packet.

>> If it turns out the NIC is not stripping VLAN tags for whatever reason,
>> we might be able to optimize things so that it never does the HW emulation
>> so that it never has to un-do it later.
>
> It might be very tricky to avoid any do-undo-redo situation. I will latter try and describe all the possible situations: with/without hw-accel, with/without a
> protocol handler registered on the parent interface, with/without a child interface build on top of a particular parent interface and with the corresponding
> VLAN ID...

You could optimize for some common cases, but leave fix-up code in place to add/remove the
tag properly before handing the packet to user-space, filters, etc.

Thanks,
Ben

>
> Nicolas.


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-24  5:19                                           ` David Miller
  2011-05-24  7:56                                             ` Eric W. Biederman
@ 2011-05-24 15:44                                             ` Ben Greear
  1 sibling, 0 replies; 92+ messages in thread
From: Ben Greear @ 2011-05-24 15:44 UTC (permalink / raw)
  To: David Miller
  Cc: ebiederm, shemminger, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

On 05/23/2011 10:19 PM, David Miller wrote:
> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Mon, 23 May 2011 15:05:54 -0700
>
>> 3) What do we do with pf_packet and vlan hardware acceleration when
>>     dumping not the vlan interface but the interface below the vlan
>>     interface?
>>
>>     Do we provide an option to keep the vlan header?  Should that option
>>     be on by default?
>>
>
> The vlan_tci in the V2 pf_packet auxdata was intended for this
> purpose.
>
> So no matter what variant of behavior is occurring, apps can properly
> reconstitute the VLAN header if they inspect the vlan_tci in the
> auxdata.

When using pf-packet on eth0, with no VLAN devices existing, would
you still be putting the VLAN tags in auxdata, or would the tags be
inline in the skb?

> The only thing that seems to be missing is an indication that a VLAN
> tag was present at all, ie. vlan_tx_tag_present(), in this manner an
> application could then differentiate between no VLAN header and a VLAN
> tag of zero.

For nested VLANs, the outside VLAN data is in the auxdata, and the rest
is inline in the packet?

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check.
  2011-05-24  7:38                                                 ` Eric W. Biederman
@ 2011-06-02  3:59                                                   ` David Miller
  2011-06-02 13:03                                                     ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Eric W. Biederman
  0 siblings, 1 reply; 92+ messages in thread
From: David Miller @ 2011-06-02  3:59 UTC (permalink / raw)
  To: ebiederm
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Tue, 24 May 2011 00:38:34 -0700

> Would you be happier if the pkt_type check was moved earlier in the
> function like say:

Yes, I like this best because it's fully consistent and it fixes
the bug.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02  3:59                                                   ` David Miller
@ 2011-06-02 13:03                                                     ` Eric W. Biederman
  2011-06-02 13:15                                                       ` Jiri Pirko
                                                                         ` (3 more replies)
  0 siblings, 4 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-06-02 13:03 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, greearb, nicolas.2p.debian, jpirko, xiaosuo, netdev,
	kaber, fubar, eric.dumazet, andy, jesse


Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
but rather in vlan_do_receive.  Otherwise the vlan header
will not be properly put on the packet in the case of
vlan header accelleration.

As we remove the check from vlan_check_reorder_header
rename it vlan_reorder_header to keep the naming clean.

Fix up the skb->pkt_type early so we don't look at the packet
after adding the vlan tag, which guarantees we don't goof
and look at the wrong field.

Use a simple if statement instead of a complicated switch
statement to decided that we need to increment rx_stats
for a multicast packet.

Hopefully at somepoint we will just declare the case where
VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
the code.  Until then this keeps it working correctly.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/if_vlan.h |   25 ++++++++++++++++++++--
 net/8021q/vlan_core.c   |   50 ++++++++++++++++++++++------------------------
 2 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 290bd8a..821f0e3 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -220,7 +220,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
 }
 
 /**
- * __vlan_put_tag - regular VLAN tag inserting
+ * vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_tci: VLAN TCI to insert
  *
@@ -229,8 +229,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
  *
  * Following the skb_unshare() example, in case of error, the calling function
  * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -250,8 +252,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
 
-	skb->protocol = htons(ETH_P_8021Q);
+	return skb;
+}
 
+/**
+ * __vlan_put_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ */
+static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+{
+	skb = vlan_insert_tag(skb, vlan_tci);
+	if (skb)
+		skb->protocol = htons(ETH_P_8021Q);
 	return skb;
 }
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc..735c818 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -23,6 +23,20 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		return false;
 
 	skb->dev = vlan_dev;
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the
+		 * underlying device, and still route correctly. */
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+					vlan_dev->dev_addr))
+			skb->pkt_type = PACKET_HOST;
+	}
+
+	if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		if (!skb)
+			return false;
+	}
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
@@ -31,22 +45,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
-
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST:
-		break;
-	case PACKET_MULTICAST:
+	if (skb->pkt_type == PACKET_MULTICAST)
 		rx_stats->rx_multicast++;
-		break;
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the
-		 * underlying device, and still route correctly. */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					vlan_dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	}
 	u64_stats_update_end(&rx_stats->syncp);
 
 	return true;
@@ -89,17 +89,15 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_frags);
 
-static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		skb = NULL;
+	if (skb) {
+		/* Lifted from Gleb's VLAN code... */
+		memmove(skb->data - ETH_HLEN,
+			skb->data - VLAN_ETH_HLEN, 12);
+		skb->mac_header += VLAN_HLEN;
 	}
 	return skb;
 }
@@ -161,7 +159,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
+	skb = vlan_reorder_header(skb);
 	if (unlikely(!skb))
 		goto err_free;
 
-- 
1.7.5.1.217.g4e3aa


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 13:03                                                     ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Eric W. Biederman
@ 2011-06-02 13:15                                                       ` Jiri Pirko
  2011-06-02 14:54                                                       ` Changli Gao
                                                                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-06-02 13:15 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, greearb, nicolas.2p.debian, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

Thu, Jun 02, 2011 at 03:03:53PM CEST, ebiederm@xmission.com wrote:
>
>Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
>but rather in vlan_do_receive.  Otherwise the vlan header
>will not be properly put on the packet in the case of
>vlan header accelleration.
>
>As we remove the check from vlan_check_reorder_header
>rename it vlan_reorder_header to keep the naming clean.
>
>Fix up the skb->pkt_type early so we don't look at the packet
>after adding the vlan tag, which guarantees we don't goof
>and look at the wrong field.
>
>Use a simple if statement instead of a complicated switch
>statement to decided that we need to increment rx_stats
>for a multicast packet.
>
>Hopefully at somepoint we will just declare the case where
>VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
>the code.  Until then this keeps it working correctly.
>
>Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Looking good to me. Thanks Eric.

Reviewed-by: Jiri Pirko <jpirko@redhat.com>


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 13:03                                                     ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Eric W. Biederman
  2011-06-02 13:15                                                       ` Jiri Pirko
@ 2011-06-02 14:54                                                       ` Changli Gao
  2011-06-02 15:26                                                         ` Eric W. Biederman
  2011-06-03  3:34                                                       ` padmanabh ratnakar
  2011-06-08 16:28                                                       ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Jiri Pirko
  3 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-06-02 14:54 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, greearb, nicolas.2p.debian, jpirko,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

On Thu, Jun 2, 2011 at 9:03 PM, Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
> +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
>  {
> -       if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
> -               if (skb_cow(skb, skb_headroom(skb)) < 0)
> -                       skb = NULL;
> -               if (skb) {
> -                       /* Lifted from Gleb's VLAN code... */
> -                       memmove(skb->data - ETH_HLEN,
> -                               skb->data - VLAN_ETH_HLEN, 12);
> -                       skb->mac_header += VLAN_HLEN;
> -               }
> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
> +               skb = NULL;
> +       if (skb) {

I think an else branch maybe more readable here.

> +               /* Lifted from Gleb's VLAN code... */
> +               memmove(skb->data - ETH_HLEN,
> +                       skb->data - VLAN_ETH_HLEN, 12);
> +               skb->mac_header += VLAN_HLEN;

skb->mac_len should be adjusted too.

>        }
>        return skb;
>  }




-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 14:54                                                       ` Changli Gao
@ 2011-06-02 15:26                                                         ` Eric W. Biederman
  2011-06-02 23:18                                                           ` Changli Gao
  2011-06-06 14:48                                                           ` Jiri Pirko
  0 siblings, 2 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-06-02 15:26 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, shemminger, greearb, nicolas.2p.debian, jpirko,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

Changli Gao <xiaosuo@gmail.com> writes:

> On Thu, Jun 2, 2011 at 9:03 PM, Eric W. Biederman <ebiederm@xmission.com> wrote:
>>
>> -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
>> +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
>>  {
>> -       if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
>> -               if (skb_cow(skb, skb_headroom(skb)) < 0)
>> -                       skb = NULL;
>> -               if (skb) {
>> -                       /* Lifted from Gleb's VLAN code... */
>> -                       memmove(skb->data - ETH_HLEN,
>> -                               skb->data - VLAN_ETH_HLEN, 12);
>> -                       skb->mac_header += VLAN_HLEN;
>> -               }
>> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
>> +               skb = NULL;
>> +       if (skb) {
>
> I think an else branch maybe more readable here.

Probably most readable would be simply returning NULL immediately on
error.  In this patch I just removed the if statement, and I would like
to avoid mixing different bug fixes in the same patch if possible.

>> +               /* Lifted from Gleb's VLAN code... */
>> +               memmove(skb->data - ETH_HLEN,
>> +                       skb->data - VLAN_ETH_HLEN, 12);
>> +               skb->mac_header += VLAN_HLEN;
>
> skb->mac_len should be adjusted too.

Given how vlan_untag is called at the moment it does appear
that the skb->mac_len = skb->network_header - skb->mac_header
in __netif_receive_skb that used to handle this for is no longer
doing this for us.

My feel is that either we need to do all of the header resets and the
vlan_untagging together.  So we either need this all together before or
after the another_round label:

So the proper fix is probably something like this.

diff --git a/net/core/dev.c b/net/core/dev.c
index bcb05cb..8fe50d4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3102,9 +3102,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
 		skb->skb_iif = skb->dev->ifindex;
 	orig_dev = skb->dev;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
 
 	pt_prev = NULL;
 
@@ -3119,6 +3116,9 @@ another_round:
 		if (unlikely(!skb))
 			goto out;
 	}
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 15:26                                                         ` Eric W. Biederman
@ 2011-06-02 23:18                                                           ` Changli Gao
  2011-06-06 14:48                                                           ` Jiri Pirko
  1 sibling, 0 replies; 92+ messages in thread
From: Changli Gao @ 2011-06-02 23:18 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, greearb, nicolas.2p.debian, jpirko,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

On Thu, Jun 2, 2011 at 11:26 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Changli Gao <xiaosuo@gmail.com> writes:
>
>> On Thu, Jun 2, 2011 at 9:03 PM, Eric W. Biederman <ebiederm@xmission.com> wrote:
>>>
>>> -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
>>> +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
>>>  {
>>> -       if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
>>> -               if (skb_cow(skb, skb_headroom(skb)) < 0)
>>> -                       skb = NULL;
>>> -               if (skb) {
>>> -                       /* Lifted from Gleb's VLAN code... */
>>> -                       memmove(skb->data - ETH_HLEN,
>>> -                               skb->data - VLAN_ETH_HLEN, 12);
>>> -                       skb->mac_header += VLAN_HLEN;
>>> -               }
>>> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
>>> +               skb = NULL;
>>> +       if (skb) {
>>
>> I think an else branch maybe more readable here.
>
> Probably most readable would be simply returning NULL immediately on
> error.  In this patch I just removed the if statement, and I would like
> to avoid mixing different bug fixes in the same patch if possible.
>

OK. It is minor.

>>> +               /* Lifted from Gleb's VLAN code... */
>>> +               memmove(skb->data - ETH_HLEN,
>>> +                       skb->data - VLAN_ETH_HLEN, 12);
>>> +               skb->mac_header += VLAN_HLEN;
>>
>> skb->mac_len should be adjusted too.
>
> Given how vlan_untag is called at the moment it does appear
> that the skb->mac_len = skb->network_header - skb->mac_header
> in __netif_receive_skb that used to handle this for is no longer
> doing this for us.
>
> My feel is that either we need to do all of the header resets and the
> vlan_untagging together.  So we either need this all together before or
> after the another_round label:
>
> So the proper fix is probably something like this.
>

OK, it is right. Thanks.


-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 13:03                                                     ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Eric W. Biederman
  2011-06-02 13:15                                                       ` Jiri Pirko
  2011-06-02 14:54                                                       ` Changli Gao
@ 2011-06-03  3:34                                                       ` padmanabh ratnakar
  2011-06-03  3:59                                                         ` Changli Gao
  2011-06-08 16:28                                                       ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Jiri Pirko
  3 siblings, 1 reply; 92+ messages in thread
From: padmanabh ratnakar @ 2011-06-03  3:34 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, greearb, nicolas.2p.debian, jpirko,
	xiaosuo, netdev, kaber, fubar, eric.dumazet, andy, jesse

Doesnt __vlan_put_tag()/vlan_insert_tag() depend on skb->data pointing
to ethernet header.
Is'nt the skb->data pointing past ethernet header in netif_receive_skb().
Am I missing anything?
Regards,
Padmanabh


On Thu, Jun 2, 2011 at 6:33 PM, Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
> but rather in vlan_do_receive.  Otherwise the vlan header
> will not be properly put on the packet in the case of
> vlan header accelleration.
>
> As we remove the check from vlan_check_reorder_header
> rename it vlan_reorder_header to keep the naming clean.
>
> Fix up the skb->pkt_type early so we don't look at the packet
> after adding the vlan tag, which guarantees we don't goof
> and look at the wrong field.
>
> Use a simple if statement instead of a complicated switch
> statement to decided that we need to increment rx_stats
> for a multicast packet.
>
> Hopefully at somepoint we will just declare the case where
> VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
> the code.  Until then this keeps it working correctly.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  include/linux/if_vlan.h |   25 ++++++++++++++++++++--
>  net/8021q/vlan_core.c   |   50 ++++++++++++++++++++++------------------------
>  2 files changed, 46 insertions(+), 29 deletions(-)
>
> diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
> index 290bd8a..821f0e3 100644
> --- a/include/linux/if_vlan.h
> +++ b/include/linux/if_vlan.h
> @@ -220,7 +220,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
>  }
>
>  /**
> - * __vlan_put_tag - regular VLAN tag inserting
> + * vlan_insert_tag - regular VLAN tag inserting
>  * @skb: skbuff to tag
>  * @vlan_tci: VLAN TCI to insert
>  *
> @@ -229,8 +229,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
>  *
>  * Following the skb_unshare() example, in case of error, the calling function
>  * doesn't have to worry about freeing the original skb.
> + *
> + * Does not change skb->protocol so this function can be used during receive.
>  */
> -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
> +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
>  {
>        struct vlan_ethhdr *veth;
>
> @@ -250,8 +252,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
>        /* now, the TCI */
>        veth->h_vlan_TCI = htons(vlan_tci);
>
> -       skb->protocol = htons(ETH_P_8021Q);
> +       return skb;
> +}
>
> +/**
> + * __vlan_put_tag - regular VLAN tag inserting
> + * @skb: skbuff to tag
> + * @vlan_tci: VLAN TCI to insert
> + *
> + * Inserts the VLAN tag into @skb as part of the payload
> + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
> + *
> + * Following the skb_unshare() example, in case of error, the calling function
> + * doesn't have to worry about freeing the original skb.
> + */
> +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
> +{
> +       skb = vlan_insert_tag(skb, vlan_tci);
> +       if (skb)
> +               skb->protocol = htons(ETH_P_8021Q);
>        return skb;
>  }
>
> diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
> index 41495dc..735c818 100644
> --- a/net/8021q/vlan_core.c
> +++ b/net/8021q/vlan_core.c
> @@ -23,6 +23,20 @@ bool vlan_do_receive(struct sk_buff **skbp)
>                return false;
>
>        skb->dev = vlan_dev;
> +       if (skb->pkt_type == PACKET_OTHERHOST) {
> +               /* Our lower layer thinks this is not local, let's make sure.
> +                * This allows the VLAN to have a different MAC than the
> +                * underlying device, and still route correctly. */
> +               if (!compare_ether_addr(eth_hdr(skb)->h_dest,
> +                                       vlan_dev->dev_addr))
> +                       skb->pkt_type = PACKET_HOST;
> +       }
> +
> +       if (unlikely(!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR))) {
> +               skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
> +               if (!skb)
> +                       return false;
> +       }
>        skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
>        skb->vlan_tci = 0;
>
> @@ -31,22 +45,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
>        u64_stats_update_begin(&rx_stats->syncp);
>        rx_stats->rx_packets++;
>        rx_stats->rx_bytes += skb->len;
> -
> -       switch (skb->pkt_type) {
> -       case PACKET_BROADCAST:
> -               break;
> -       case PACKET_MULTICAST:
> +       if (skb->pkt_type == PACKET_MULTICAST)
>                rx_stats->rx_multicast++;
> -               break;
> -       case PACKET_OTHERHOST:
> -               /* Our lower layer thinks this is not local, let's make sure.
> -                * This allows the VLAN to have a different MAC than the
> -                * underlying device, and still route correctly. */
> -               if (!compare_ether_addr(eth_hdr(skb)->h_dest,
> -                                       vlan_dev->dev_addr))
> -                       skb->pkt_type = PACKET_HOST;
> -               break;
> -       }
>        u64_stats_update_end(&rx_stats->syncp);
>
>        return true;
> @@ -89,17 +89,15 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
>  }
>  EXPORT_SYMBOL(vlan_gro_frags);
>
> -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
> +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
>  {
> -       if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
> -               if (skb_cow(skb, skb_headroom(skb)) < 0)
> -                       skb = NULL;
> -               if (skb) {
> -                       /* Lifted from Gleb's VLAN code... */
> -                       memmove(skb->data - ETH_HLEN,
> -                               skb->data - VLAN_ETH_HLEN, 12);
> -                       skb->mac_header += VLAN_HLEN;
> -               }
> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
> +               skb = NULL;
> +       if (skb) {
> +               /* Lifted from Gleb's VLAN code... */
> +               memmove(skb->data - ETH_HLEN,
> +                       skb->data - VLAN_ETH_HLEN, 12);
> +               skb->mac_header += VLAN_HLEN;
>        }
>        return skb;
>  }
> @@ -161,7 +159,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
>        skb_pull_rcsum(skb, VLAN_HLEN);
>        vlan_set_encap_proto(skb, vhdr);
>
> -       skb = vlan_check_reorder_header(skb);
> +       skb = vlan_reorder_header(skb);
>        if (unlikely(!skb))
>                goto err_free;
>
> --
> 1.7.5.1.217.g4e3aa
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-03  3:34                                                       ` padmanabh ratnakar
@ 2011-06-03  3:59                                                         ` Changli Gao
  2011-06-05 21:14                                                           ` David Miller
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-06-03  3:59 UTC (permalink / raw)
  To: padmanabh ratnakar
  Cc: Eric W. Biederman, David Miller, shemminger, greearb,
	nicolas.2p.debian, jpirko, netdev, kaber, fubar, eric.dumazet,
	andy, jesse

On Fri, Jun 3, 2011 at 11:34 AM, padmanabh ratnakar
<pratnakarlx@gmail.com> wrote:
> Doesnt __vlan_put_tag()/vlan_insert_tag() depend on skb->data pointing
> to ethernet header.
> Is'nt the skb->data pointing past ethernet header in netif_receive_skb().
> Am I missing anything?

Yes, you are right. skb->data should be adjusted before feeding to
vlan_insert_tag(), or we make vlan_insert_tag() rely on
skb->mac_header instead.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-03  3:59                                                         ` Changli Gao
@ 2011-06-05 21:14                                                           ` David Miller
  2011-06-10  8:35                                                             ` [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: David Miller @ 2011-06-05 21:14 UTC (permalink / raw)
  To: xiaosuo
  Cc: pratnakarlx, ebiederm, shemminger, greearb, nicolas.2p.debian,
	jpirko, netdev, kaber, fubar, eric.dumazet, andy, jesse

From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 3 Jun 2011 11:59:26 +0800

> On Fri, Jun 3, 2011 at 11:34 AM, padmanabh ratnakar
> <pratnakarlx@gmail.com> wrote:
>> Doesnt __vlan_put_tag()/vlan_insert_tag() depend on skb->data pointing
>> to ethernet header.
>> Is'nt the skb->data pointing past ethernet header in netif_receive_skb().
>> Am I missing anything?
> 
> Yes, you are right. skb->data should be adjusted before feeding to
> vlan_insert_tag(), or we make vlan_insert_tag() rely on
> skb->mac_header instead.

I told you guys this is not simple stuff :-)

Even here we have 3 or 4 people who study this code all the time, and
are still having trouble sorting out all of these details.


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 15:26                                                         ` Eric W. Biederman
  2011-06-02 23:18                                                           ` Changli Gao
@ 2011-06-06 14:48                                                           ` Jiri Pirko
  1 sibling, 0 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-06-06 14:48 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Changli Gao, David Miller, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

Thu, Jun 02, 2011 at 05:26:51PM CEST, ebiederm@xmission.com wrote:
>Changli Gao <xiaosuo@gmail.com> writes:
>
>> On Thu, Jun 2, 2011 at 9:03 PM, Eric W. Biederman <ebiederm@xmission.com> wrote:
>>>
>>> -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
>>> +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
>>>  {
>>> -       if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
>>> -               if (skb_cow(skb, skb_headroom(skb)) < 0)
>>> -                       skb = NULL;
>>> -               if (skb) {
>>> -                       /* Lifted from Gleb's VLAN code... */
>>> -                       memmove(skb->data - ETH_HLEN,
>>> -                               skb->data - VLAN_ETH_HLEN, 12);
>>> -                       skb->mac_header += VLAN_HLEN;
>>> -               }
>>> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
>>> +               skb = NULL;
>>> +       if (skb) {
>>
>> I think an else branch maybe more readable here.
>
>Probably most readable would be simply returning NULL immediately on
>error.  In this patch I just removed the if statement, and I would like
>to avoid mixing different bug fixes in the same patch if possible.
>
>>> +               /* Lifted from Gleb's VLAN code... */
>>> +               memmove(skb->data - ETH_HLEN,
>>> +                       skb->data - VLAN_ETH_HLEN, 12);
>>> +               skb->mac_header += VLAN_HLEN;
>>
>> skb->mac_len should be adjusted too.
>
>Given how vlan_untag is called at the moment it does appear
>that the skb->mac_len = skb->network_header - skb->mac_header
>in __netif_receive_skb that used to handle this for is no longer
>doing this for us.
>
>My feel is that either we need to do all of the header resets and the
>vlan_untagging together.  So we either need this all together before or
>after the another_round label:
>
>So the proper fix is probably something like this.
>
>diff --git a/net/core/dev.c b/net/core/dev.c
>index bcb05cb..8fe50d4 100644
>--- a/net/core/dev.c
>+++ b/net/core/dev.c
>@@ -3102,9 +3102,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
> 		skb->skb_iif = skb->dev->ifindex;
> 	orig_dev = skb->dev;
> 
>-	skb_reset_network_header(skb);
>-	skb_reset_transport_header(skb);
>-	skb->mac_len = skb->network_header - skb->mac_header;
> 
> 	pt_prev = NULL;
> 
>@@ -3119,6 +3116,9 @@ another_round:
> 		if (unlikely(!skb))
> 			goto out;
> 	}
>+	skb_reset_network_header(skb);
>+	skb_reset_transport_header(skb);
>+	skb->mac_len = skb->network_header - skb->mac_header;
> 
> #ifdef CONFIG_NET_CLS_ACT
> 	if (skb->tc_verd & TC_NCLS) {


This looks good to me. This does not need to be done before vlan_untag.


^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-02 13:03                                                     ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Eric W. Biederman
                                                                         ` (2 preceding siblings ...)
  2011-06-03  3:34                                                       ` padmanabh ratnakar
@ 2011-06-08 16:28                                                       ` Jiri Pirko
  2011-06-08 23:08                                                         ` Changli Gao
  3 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-06-08 16:28 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, shemminger, greearb, nicolas.2p.debian, xiaosuo,
	netdev, kaber, fubar, eric.dumazet, andy, jesse

Thu, Jun 02, 2011 at 03:03:53PM CEST, ebiederm@xmission.com wrote:
>
>Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
>but rather in vlan_do_receive.  Otherwise the vlan header
>will not be properly put on the packet in the case of
>vlan header accelleration.
>
>As we remove the check from vlan_check_reorder_header
>rename it vlan_reorder_header to keep the naming clean.
>
>Fix up the skb->pkt_type early so we don't look at the packet
>after adding the vlan tag, which guarantees we don't goof
>and look at the wrong field.
>
>Use a simple if statement instead of a complicated switch
>statement to decided that we need to increment rx_stats
>for a multicast packet.
>
>Hopefully at somepoint we will just declare the case where
>VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
>the code.  Until then this keeps it working correctly.

Why we can't remove this right away?

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-08 16:28                                                       ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Jiri Pirko
@ 2011-06-08 23:08                                                         ` Changli Gao
  2011-06-09  6:01                                                           ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-06-08 23:08 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Eric W. Biederman, David Miller, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

On Thu, Jun 9, 2011 at 12:28 AM, Jiri Pirko <jpirko@redhat.com> wrote:
>
> Why we can't remove this right away?
>

I think the reason is this patch is a bug fix for the stable kernel,
so we should not introduce new features or remove old features.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2
  2011-06-08 23:08                                                         ` Changli Gao
@ 2011-06-09  6:01                                                           ` Jiri Pirko
  0 siblings, 0 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-06-09  6:01 UTC (permalink / raw)
  To: Changli Gao
  Cc: Eric W. Biederman, David Miller, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

Thu, Jun 09, 2011 at 01:08:13AM CEST, xiaosuo@gmail.com wrote:
>On Thu, Jun 9, 2011 at 12:28 AM, Jiri Pirko <jpirko@redhat.com> wrote:
>>
>> Why we can't remove this right away?
>>
>
>I think the reason is this patch is a bug fix for the stable kernel,
>so we should not introduce new features or remove old features.

But for net-next if should be ok, right?

What's the reason for possibility of removing the reorder flag? Why
would user want do it?

Thanks

Jirka
>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 3/3] vlan: Simplify the code now that VLAN_FLAG_REORDER_HDR is always set
  2011-05-22 19:42                         ` [PATCH 3/3] vlan: Simplify the code now that VLAN_FLAG_REORDER_HDR is always set Eric W. Biederman
@ 2011-06-09 10:59                           ` Jiri Pirko
  2011-06-12  6:17                             ` Eric W. Biederman
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-06-09 10:59 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Nicolas de Pesloüan, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

Sun, May 22, 2011 at 09:42:20PM CEST, ebiederm@xmission.com wrote:
>
>Now that we no longer support clearing VLAN_FLAG_REORDER_HDR remove the
>code that was needed to cope with the case when it was cleared.
>
>Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>---
> net/8021q/vlan_dev.c |   45 +++++----------------------------------------
> 1 files changed, 5 insertions(+), 40 deletions(-)
>
>diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
>index 20629fe..2b3ca1e 100644
>--- a/net/8021q/vlan_dev.c
>+++ b/net/8021q/vlan_dev.c
>@@ -96,63 +96,28 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
> 				const void *daddr, const void *saddr,
> 				unsigned int len)
> {
>-	struct vlan_hdr *vhdr;
>-	unsigned int vhdrlen = 0;
>-	u16 vlan_tci = 0;
> 	int rc;
> 
>-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
>-		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
>-
>-		vlan_tci = vlan_dev_info(dev)->vlan_id;
>-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
>-		vhdr->h_vlan_TCI = htons(vlan_tci);
>-
>-		/*
>-		 *  Set the protocol type. For a packet of type ETH_P_802_3/2 we
>-		 *  put the length in here instead.
>-		 */
>-		if (type != ETH_P_802_3 && type != ETH_P_802_2)
>-			vhdr->h_vlan_encapsulated_proto = htons(type);
>-		else
>-			vhdr->h_vlan_encapsulated_proto = htons(len);
>-
>-		skb->protocol = htons(ETH_P_8021Q);
>-		type = ETH_P_8021Q;
>-		vhdrlen = VLAN_HLEN;
>-	}
>-
> 	/* Before delegating work to the lower layer, enter our MAC-address */
> 	if (saddr == NULL)
> 		saddr = dev->dev_addr;
> 
> 	/* Now make the underlying real hard header */
> 	dev = vlan_dev_info(dev)->real_dev;
>-	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
>-	if (rc > 0)
>-		rc += vhdrlen;
>+	rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
> 	return rc;
> }
> 
> static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
> 					    struct net_device *dev)
> {
>-	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
> 	unsigned int len;
>+	u16 vlan_tci;
> 	int ret;
> 
>-	/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
>-	 *
>-	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
>-	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
>-	 */
>-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
	    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	    		this should stay here.

>-	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
>-		u16 vlan_tci;
>-		vlan_tci = vlan_dev_info(dev)->vlan_id;
>-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
>-		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
>-	}
>+	vlan_tci = vlan_dev_info(dev)->vlan_id;
>+	vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
>+	skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
> 
> 	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
> 	len = skb->len;
>-- 
>1.7.5.1.217.g4e3aa
>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR
  2011-05-22 19:39                     ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Eric W. Biederman
  2011-05-22 19:40                       ` [PATCH 2/3] vlan: Always strip the vlan header in vlan_untag Eric W. Biederman
  2011-05-22 21:04                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Ben Greear
@ 2011-06-09 11:00                       ` Jiri Pirko
  2 siblings, 0 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-06-09 11:00 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Nicolas de Pesloüan, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

How about to drop this patch and silently ignore the flag? That would
feel the same for user who is setting it.

Sun, May 22, 2011 at 09:39:18PM CEST, ebiederm@xmission.com wrote:
>
>Simplify the vlan handling code by not supporing clearing of
>VLAN_FLAG_REORDER_HDR.  Which means we always make the vlan handling
>code strip the vlan header from the packets, and always insert the vlan
>header when transmitting packets.
>
>Not stripping the vlan header has alwasy been broken in combination with
>vlan hardware accelleration.  Now that we are making everything look
>like accelerated vlan handling not stripping the vlan header is always
>broken.
>
>I don't think anyone actually cares so simply stop supporting the broken
>case.
>
>Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>---
> net/8021q/vlan_dev.c     |    3 +--
> net/8021q/vlan_netlink.c |    4 +---
> 2 files changed, 2 insertions(+), 5 deletions(-)
>
>diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
>index f247f5b..20629fe 100644
>--- a/net/8021q/vlan_dev.c
>+++ b/net/8021q/vlan_dev.c
>@@ -242,8 +242,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
> 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
> 	u32 old_flags = vlan->flags;
> 
>-	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
>-		     VLAN_FLAG_LOOSE_BINDING))
>+	if (mask & ~(VLAN_FLAG_GVRP | VLAN_FLAG_LOOSE_BINDING))
> 		return -EINVAL;
> 
> 	vlan->flags = (old_flags & ~mask) | (flags & mask);
>diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
>index be9a5c1..a638a4c 100644
>--- a/net/8021q/vlan_netlink.c
>+++ b/net/8021q/vlan_netlink.c
>@@ -59,9 +59,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
> 	}
> 	if (data[IFLA_VLAN_FLAGS]) {
> 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
>-		if ((flags->flags & flags->mask) &
>-		    ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
>-		      VLAN_FLAG_LOOSE_BINDING))
>+		if (flags->mask & ~(VLAN_FLAG_GVRP | VLAN_FLAG_LOOSE_BINDING))
> 			return -EINVAL;
> 	}
> 
>-- 
>1.7.5.1.217.g4e3aa
>

^ permalink raw reply	[flat|nested] 92+ messages in thread

* [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-05 21:14                                                           ` David Miller
@ 2011-06-10  8:35                                                             ` Jiri Pirko
  2011-06-10  9:26                                                               ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-06-10  8:35 UTC (permalink / raw)
  To: David Miller
  Cc: xiaosuo, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

Please review. I'm not sure if there wouldn't be necessary to control
for rx accelerated skbs. Smoke tested.

Subject: [patch net-2.6 v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check

Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
but rather in vlan_do_receive.  Otherwise the vlan header
will not be properly put on the packet in the case of
vlan header accelleration.

As we remove the check from vlan_check_reorder_header
rename it vlan_reorder_header to keep the naming clean.

Use a simple if statement instead of a complicated switch
statement to decided that we need to increment rx_stats
for a multicast packet.

Hopefully at somepoint we will just declare the case where
VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
the code.  Until then this keeps it working correctly.

Use vlan_unreorder_header to revert actions previously done by
vlan_untag->vlan_reorder_header in case VLAN_FLAG_REORDER_HDR is not set

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 net/8021q/vlan_core.c |   75 ++++++++++++++++++++++++++++--------------------
 1 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc..d71cc81 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,6 +4,33 @@
 #include <linux/netpoll.h>
 #include "vlan.h"
 
+static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
+{
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		return NULL;
+	/* Lifted from Gleb's VLAN code... */
+	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 12);
+	skb->mac_header += VLAN_HLEN;
+	return skb;
+}
+
+/* Should be used only to revert vlan_reorder_header action */
+static struct sk_buff *vlan_unreorder_header(struct sk_buff *skb)
+{
+	unsigned char *mac_header;
+	struct vlan_ethhdr *veth;
+
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		return NULL;
+	skb->mac_header -= VLAN_HLEN;
+	mac_header = skb_mac_header(skb);
+	memmove(mac_header, mac_header + VLAN_HLEN, 12);
+	veth = (struct vlan_ethhdr *) mac_header;
+	veth->h_vlan_proto = htons(ETH_P_8021Q);
+	veth->h_vlan_TCI = htons(skb->vlan_tci);
+	return skb;
+}
+
 bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
@@ -23,6 +50,21 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		return false;
 
 	skb->dev = vlan_dev;
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the
+		 * underlying device, and still route correctly. */
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+					vlan_dev->dev_addr))
+			skb->pkt_type = PACKET_HOST;
+	}
+
+	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+		skb = *skbp = vlan_unreorder_header(skb);
+		if (!skb)
+			return false;
+	}
+
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
@@ -31,22 +73,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
-
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST:
-		break;
-	case PACKET_MULTICAST:
+	if (skb->pkt_type == PACKET_MULTICAST)
 		rx_stats->rx_multicast++;
-		break;
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the
-		 * underlying device, and still route correctly. */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					vlan_dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	}
 	u64_stats_update_end(&rx_stats->syncp);
 
 	return true;
@@ -89,21 +117,6 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_frags);
 
-static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
-{
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
-	}
-	return skb;
-}
-
 static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
 {
 	__be16 proto;
@@ -161,7 +174,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
+	skb = vlan_reorder_header(skb);
 	if (unlikely(!skb))
 		goto err_free;
 
-- 
1.7.5.2


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10  8:35                                                             ` [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Jiri Pirko
@ 2011-06-10  9:26                                                               ` Changli Gao
  2011-06-10  9:34                                                                 ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-06-10  9:26 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

On Fri, Jun 10, 2011 at 4:35 PM, Jiri Pirko <jpirko@redhat.com> wrote:
> +
> +/* Should be used only to revert vlan_reorder_header action */
> +static struct sk_buff *vlan_unreorder_header(struct sk_buff *skb)
> +{
> +       unsigned char *mac_header;
> +       struct vlan_ethhdr *veth;
> +
> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
> +               return NULL;

I think we need to make sure if there is enough headroom for this
header expansion.

> +       skb->mac_header -= VLAN_HLEN;

skb->mac_len isn't adjusted. You forgot to move the headers resetting
after vlan_untag().



-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10  9:26                                                               ` Changli Gao
@ 2011-06-10  9:34                                                                 ` Jiri Pirko
  2011-06-10  9:49                                                                   ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-06-10  9:34 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

Fri, Jun 10, 2011 at 11:26:06AM CEST, xiaosuo@gmail.com wrote:
>On Fri, Jun 10, 2011 at 4:35 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>> +
>> +/* Should be used only to revert vlan_reorder_header action */
>> +static struct sk_buff *vlan_unreorder_header(struct sk_buff *skb)
>> +{
>> +       unsigned char *mac_header;
>> +       struct vlan_ethhdr *veth;
>> +
>> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
>> +               return NULL;
>
>I think we need to make sure if there is enough headroom for this
>header expansion.

Well the header expansion was previously there so there should be place
there in all cases, or am I wrong?

>
>> +       skb->mac_header -= VLAN_HLEN;
>
>skb->mac_len isn't adjusted. You forgot to move the headers resetting
>after vlan_untag().

that is correct, I'll move that. I'll also add reset after unreorder.

Thanks Changli.

Jirka
>
>
>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10  9:34                                                                 ` Jiri Pirko
@ 2011-06-10  9:49                                                                   ` Changli Gao
  2011-06-10 10:35                                                                     ` Jiri Pirko
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-06-10  9:49 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

On Fri, Jun 10, 2011 at 5:34 PM, Jiri Pirko <jpirko@redhat.com> wrote:
> Fri, Jun 10, 2011 at 11:26:06AM CEST, xiaosuo@gmail.com wrote:
>>On Fri, Jun 10, 2011 at 4:35 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>>> +
>>> +/* Should be used only to revert vlan_reorder_header action */
>>> +static struct sk_buff *vlan_unreorder_header(struct sk_buff *skb)
>>> +{
>>> +       unsigned char *mac_header;
>>> +       struct vlan_ethhdr *veth;
>>> +
>>> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
>>> +               return NULL;
>>
>>I think we need to make sure if there is enough headroom for this
>>header expansion.
>
> Well the header expansion was previously there so there should be place
> there in all cases, or am I wrong?

For hw-accel-vlan-rx, is the headroom for this header expansion
reserved? I don't think so. Thanks.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10  9:49                                                                   ` Changli Gao
@ 2011-06-10 10:35                                                                     ` Jiri Pirko
  2011-06-10 11:20                                                                       ` Changli Gao
  0 siblings, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-06-10 10:35 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

Fri, Jun 10, 2011 at 11:49:27AM CEST, xiaosuo@gmail.com wrote:
>On Fri, Jun 10, 2011 at 5:34 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>> Fri, Jun 10, 2011 at 11:26:06AM CEST, xiaosuo@gmail.com wrote:
>>>On Fri, Jun 10, 2011 at 4:35 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>>>> +
>>>> +/* Should be used only to revert vlan_reorder_header action */
>>>> +static struct sk_buff *vlan_unreorder_header(struct sk_buff *skb)
>>>> +{
>>>> +       unsigned char *mac_header;
>>>> +       struct vlan_ethhdr *veth;
>>>> +
>>>> +       if (skb_cow(skb, skb_headroom(skb)) < 0)
>>>> +               return NULL;
>>>
>>>I think we need to make sure if there is enough headroom for this
>>>header expansion.
>>
>> Well the header expansion was previously there so there should be place
>> there in all cases, or am I wrong?
>
>For hw-accel-vlan-rx, is the headroom for this header expansion
>reserved? I don't think so. Thanks.

But this wasn't done for hw-accel-vlan-rx previously right? So why don't
check for hw-accel-vlan-rx and don do unreorder in that case?
>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10 10:35                                                                     ` Jiri Pirko
@ 2011-06-10 11:20                                                                       ` Changli Gao
  2011-06-10 12:12                                                                         ` Jiri Pirko
  2011-06-10 16:56                                                                         ` Jiri Pirko
  0 siblings, 2 replies; 92+ messages in thread
From: Changli Gao @ 2011-06-10 11:20 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

On Fri, Jun 10, 2011 at 6:35 PM, Jiri Pirko <jpirko@redhat.com> wrote:
> Fri, Jun 10, 2011 at 11:49:27AM CEST, xiaosuo@gmail.com wrote:
>>
>>For hw-accel-vlan-rx, is the headroom for this header expansion
>>reserved? I don't think so. Thanks.
>
> But this wasn't done for hw-accel-vlan-rx previously right? So why don't
> check for hw-accel-vlan-rx and don do unreorder in that case?
>

Yes, it wasn't done before. We want to make the behaviors of
hw-accel-vlan-rx and non-hw-accel-vlan-rx are the same. Is it your
original goal of your vlan-untag patch?

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10 11:20                                                                       ` Changli Gao
@ 2011-06-10 12:12                                                                         ` Jiri Pirko
  2011-06-10 16:56                                                                         ` Jiri Pirko
  1 sibling, 0 replies; 92+ messages in thread
From: Jiri Pirko @ 2011-06-10 12:12 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

Fri, Jun 10, 2011 at 01:20:29PM CEST, xiaosuo@gmail.com wrote:
>On Fri, Jun 10, 2011 at 6:35 PM, Jiri Pirko <jpirko@redhat.com> wrote:
>> Fri, Jun 10, 2011 at 11:49:27AM CEST, xiaosuo@gmail.com wrote:
>>>
>>>For hw-accel-vlan-rx, is the headroom for this header expansion
>>>reserved? I don't think so. Thanks.
>>
>> But this wasn't done for hw-accel-vlan-rx previously right? So why don't
>> check for hw-accel-vlan-rx and don do unreorder in that case?
>>
>
>Yes, it wasn't done before. We want to make the behaviors of
>hw-accel-vlan-rx and non-hw-accel-vlan-rx are the same. Is it your
>original goal of your vlan-untag patch?

You are correct. I just thought that since disabling reordering will be
most likely removed in near future, non-hw-accel-vlan-rx reorder disable
would not be needed.

>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10 11:20                                                                       ` Changli Gao
  2011-06-10 12:12                                                                         ` Jiri Pirko
@ 2011-06-10 16:56                                                                         ` Jiri Pirko
  2011-06-11  0:05                                                                           ` Changli Gao
  1 sibling, 1 reply; 92+ messages in thread
From: Jiri Pirko @ 2011-06-10 16:56 UTC (permalink / raw)
  To: Changli Gao
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

This time heavily based on Eric's V2. mac_len is reset at appropriate
places. Also skb->data is adjusted to point to beginning of mac header
before calling vlan_insert_tag.

Please review (fingers crossed).

Subject: [patch net-2.6 v2.1] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check

Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
but rather in vlan_do_receive.  Otherwise the vlan header
will not be properly put on the packet in the case of
vlan header accelleration.

As we remove the check from vlan_check_reorder_header
rename it vlan_reorder_header to keep the naming clean.

Fix up the skb->pkt_type early so we don't look at the packet
after adding the vlan tag, which guarantees we don't goof
and look at the wrong field.

Use a simple if statement instead of a complicated switch
statement to decided that we need to increment rx_stats
for a multicast packet.

Hopefully at somepoint we will just declare the case where
VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
the code.  Until then this keeps it working correctly.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 include/linux/if_vlan.h |   25 +++++++++++++++++--
 include/linux/skbuff.h  |    5 ++++
 net/8021q/vlan_core.c   |   60 +++++++++++++++++++++++++---------------------
 net/core/dev.c          |    2 +-
 4 files changed, 61 insertions(+), 31 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index dc01681..affa273 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -225,7 +225,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
 }
 
 /**
- * __vlan_put_tag - regular VLAN tag inserting
+ * vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_tci: VLAN TCI to insert
  *
@@ -234,8 +234,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb,
  *
  * Following the skb_unshare() example, in case of error, the calling function
  * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
@@ -255,8 +257,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
 
-	skb->protocol = htons(ETH_P_8021Q);
+	return skb;
+}
 
+/**
+ * __vlan_put_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ */
+static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
+{
+	skb = vlan_insert_tag(skb, vlan_tci);
+	if (skb)
+		skb->protocol = htons(ETH_P_8021Q);
 	return skb;
 }
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e8b78ce..c0a4f3a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1256,6 +1256,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_mac_len(struct sk_buff *skb)
+{
+	skb->mac_len = skb->network_header - skb->mac_header;
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 41495dc..fcc6846 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		return false;
 
 	skb->dev = vlan_dev;
+	if (skb->pkt_type == PACKET_OTHERHOST) {
+		/* Our lower layer thinks this is not local, let's make sure.
+		 * This allows the VLAN to have a different MAC than the
+		 * underlying device, and still route correctly. */
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+					vlan_dev->dev_addr))
+			skb->pkt_type = PACKET_HOST;
+	}
+
+	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+		unsigned int offset = skb->data - skb_mac_header(skb);
+
+		/*
+		 * vlan_insert_tag expect skb->data pointing to mac header.
+		 * So change skb->data before calling it and change back to
+		 * original position later
+		 */
+		skb_push(skb, offset);
+		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
+		if (!skb)
+			return false;
+		skb_pull(skb, offset + VLAN_HLEN);
+		skb_reset_mac_len(skb);
+	}
+
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
@@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
-
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST:
-		break;
-	case PACKET_MULTICAST:
+	if (skb->pkt_type == PACKET_MULTICAST)
 		rx_stats->rx_multicast++;
-		break;
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the
-		 * underlying device, and still route correctly. */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					vlan_dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	}
 	u64_stats_update_end(&rx_stats->syncp);
 
 	return true;
@@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 }
 EXPORT_SYMBOL(vlan_gro_frags);
 
-static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 {
-	if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		if (skb_cow(skb, skb_headroom(skb)) < 0)
-			skb = NULL;
-		if (skb) {
-			/* Lifted from Gleb's VLAN code... */
-			memmove(skb->data - ETH_HLEN,
-				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac_header += VLAN_HLEN;
-		}
-	}
+	if (skb_cow(skb, skb_headroom(skb)) < 0)
+		return NULL;
+	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	skb->mac_header += VLAN_HLEN;
+	skb_reset_mac_len(skb);
 	return skb;
 }
 
@@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
+	skb = vlan_reorder_header(skb);
 	if (unlikely(!skb))
 		goto err_free;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a54c9f8..9c58c1e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
+	skb_reset_mac_len(skb);
 
 	pt_prev = NULL;
 
-- 
1.7.5.2


^ permalink raw reply related	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-10 16:56                                                                         ` Jiri Pirko
@ 2011-06-11  0:05                                                                           ` Changli Gao
  2011-06-11 23:16                                                                             ` David Miller
  0 siblings, 1 reply; 92+ messages in thread
From: Changli Gao @ 2011-06-11  0:05 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

On Sat, Jun 11, 2011 at 12:56 AM, Jiri Pirko <jpirko@redhat.com> wrote:
> This time heavily based on Eric's V2. mac_len is reset at appropriate
> places. Also skb->data is adjusted to point to beginning of mac header
> before calling vlan_insert_tag.
>
> Please review (fingers crossed).
>
> Subject: [patch net-2.6 v2.1] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
>
> Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
> but rather in vlan_do_receive.  Otherwise the vlan header
> will not be properly put on the packet in the case of
> vlan header accelleration.
>
> As we remove the check from vlan_check_reorder_header
> rename it vlan_reorder_header to keep the naming clean.
>
> Fix up the skb->pkt_type early so we don't look at the packet
> after adding the vlan tag, which guarantees we don't goof
> and look at the wrong field.
>
> Use a simple if statement instead of a complicated switch
> statement to decided that we need to increment rx_stats
> for a multicast packet.
>
> Hopefully at somepoint we will just declare the case where
> VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
> the code.  Until then this keeps it working correctly.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
  2011-06-11  0:05                                                                           ` Changli Gao
@ 2011-06-11 23:16                                                                             ` David Miller
  0 siblings, 0 replies; 92+ messages in thread
From: David Miller @ 2011-06-11 23:16 UTC (permalink / raw)
  To: xiaosuo
  Cc: jpirko, pratnakarlx, ebiederm, shemminger, greearb,
	nicolas.2p.debian, netdev, kaber, fubar, eric.dumazet, andy,
	jesse

From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 11 Jun 2011 08:05:37 +0800

> On Sat, Jun 11, 2011 at 12:56 AM, Jiri Pirko <jpirko@redhat.com> wrote:
>> This time heavily based on Eric's V2. mac_len is reset at appropriate
>> places. Also skb->data is adjusted to point to beginning of mac header
>> before calling vlan_insert_tag.
>>
>> Please review (fingers crossed).
>>
>> Subject: [patch net-2.6 v2.1] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check
>>
>> Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag
>> but rather in vlan_do_receive.  Otherwise the vlan header
>> will not be properly put on the packet in the case of
>> vlan header accelleration.
>>
>> As we remove the check from vlan_check_reorder_header
>> rename it vlan_reorder_header to keep the naming clean.
>>
>> Fix up the skb->pkt_type early so we don't look at the packet
>> after adding the vlan tag, which guarantees we don't goof
>> and look at the wrong field.
>>
>> Use a simple if statement instead of a complicated switch
>> statement to decided that we need to increment rx_stats
>> for a multicast packet.
>>
>> Hopefully at somepoint we will just declare the case where
>> VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove
>> the code.  Until then this keeps it working correctly.
>>
>> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> Acked-by: Changli Gao <xiaosuo@gmail.com>

Applied, great work everyone.

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH 3/3] vlan: Simplify the code now that VLAN_FLAG_REORDER_HDR is always set
  2011-06-09 10:59                           ` Jiri Pirko
@ 2011-06-12  6:17                             ` Eric W. Biederman
  0 siblings, 0 replies; 92+ messages in thread
From: Eric W. Biederman @ 2011-06-12  6:17 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, Nicolas de Pesloüan, Changli Gao, netdev,
	shemminger, kaber, fubar, eric.dumazet, andy, Jesse Gross

Jiri Pirko <jpirko@redhat.com> writes:

> Sun, May 22, 2011 at 09:42:20PM CEST, ebiederm@xmission.com wrote:
>>
>>Now that we no longer support clearing VLAN_FLAG_REORDER_HDR remove the
>>code that was needed to cope with the case when it was cleared.
>>
>>Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>>---
>> net/8021q/vlan_dev.c |   45 +++++----------------------------------------
>> 1 files changed, 5 insertions(+), 40 deletions(-)
>>
>>diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
>>index 20629fe..2b3ca1e 100644
>>--- a/net/8021q/vlan_dev.c
>>+++ b/net/8021q/vlan_dev.c
>>@@ -96,63 +96,28 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
>> 				const void *daddr, const void *saddr,
>> 				unsigned int len)
>> {
>>-	struct vlan_hdr *vhdr;
>>-	unsigned int vhdrlen = 0;
>>-	u16 vlan_tci = 0;
>> 	int rc;
>> 
>>-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
>>-		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
>>-
>>-		vlan_tci = vlan_dev_info(dev)->vlan_id;
>>-		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
>>-		vhdr->h_vlan_TCI = htons(vlan_tci);
>>-
>>-		/*
>>-		 *  Set the protocol type. For a packet of type ETH_P_802_3/2 we
>>-		 *  put the length in here instead.
>>-		 */
>>-		if (type != ETH_P_802_3 && type != ETH_P_802_2)
>>-			vhdr->h_vlan_encapsulated_proto = htons(type);
>>-		else
>>-			vhdr->h_vlan_encapsulated_proto = htons(len);
>>-
>>-		skb->protocol = htons(ETH_P_8021Q);
>>-		type = ETH_P_8021Q;
>>-		vhdrlen = VLAN_HLEN;
>>-	}
>>-
>> 	/* Before delegating work to the lower layer, enter our MAC-address */
>> 	if (saddr == NULL)
>> 		saddr = dev->dev_addr;
>> 
>> 	/* Now make the underlying real hard header */
>> 	dev = vlan_dev_info(dev)->real_dev;
>>-	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
>>-	if (rc > 0)
>>-		rc += vhdrlen;
>>+	rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
>> 	return rc;
>> }
>> 
>> static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
>> 					    struct net_device *dev)
>> {
>>-	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
>> 	unsigned int len;
>>+	u16 vlan_tci;
>> 	int ret;
>> 
>>-	/* Handle non-VLAN frames if they are sent to us, for example by DHCP.
>>-	 *
>>-	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
>>-	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
>>-	 */
>>-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
> 	    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 	    		this should stay here.
>

I admit this is a change in behavior from today so we need to be careful
here.

At least the comment needs to change.

When this code was written the assumption was that everything would come
in tagged and this code was a special exception for pf_packet sockets.

Now everything comes in untagged this code becomes a special exception
for pf_packet sockets of not putting on a vlan header.

To me this special exception pf_packet sockets looks like a bug, that
should have been conditionality on VLAN_FLAG_REORDER_HDR but was over
looked.

Now maybe we want to be bug compatible, or do this as a separate patch.

I would expect sending tagged packets out a vlan device would result
in double tagged packets but apparently not always.


Eric

^ permalink raw reply	[flat|nested] 92+ messages in thread

end of thread, other threads:[~2011-06-12  6:17 UTC | newest]

Thread overview: 92+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-08  5:48 [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel Jiri Pirko
2011-04-12 21:16 ` David Miller
2011-05-21  1:11   ` Changli Gao
2011-05-21  7:29     ` Jiri Pirko
2011-05-21 10:43       ` Changli Gao
2011-05-21 13:17         ` Nicolas de Pesloüan
2011-05-21 17:54           ` Jesse Gross
2011-05-21 22:15             ` Stephen Hemminger
2011-05-22  2:59             ` Nicolas de Pesloüan
2011-05-22  6:29               ` Jiri Pirko
2011-05-22  6:34                 ` Eric W. Biederman
2011-05-22  8:34                   ` Nicolas de Pesloüan
2011-05-22  8:52                     ` Michał Mirosław
2011-05-22  9:10                       ` Nicolas de Pesloüan
2011-05-22  9:20                         ` Michał Mirosław
2011-05-22  9:36                           ` Jiri Pirko
2011-05-22  9:53                             ` Nicolas de Pesloüan
2011-05-22 10:04                               ` Michał Mirosław
2011-05-22 16:11                   ` Jesse Gross
2011-05-22 18:24                     ` Eric W. Biederman
2011-05-22 19:33                       ` Eric W. Biederman
2011-05-22 19:39                     ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Eric W. Biederman
2011-05-22 19:40                       ` [PATCH 2/3] vlan: Always strip the vlan header in vlan_untag Eric W. Biederman
2011-05-22 19:42                         ` [PATCH 3/3] vlan: Simplify the code now that VLAN_FLAG_REORDER_HDR is always set Eric W. Biederman
2011-06-09 10:59                           ` Jiri Pirko
2011-06-12  6:17                             ` Eric W. Biederman
2011-05-22 21:04                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Ben Greear
2011-05-22 22:38                         ` Eric W. Biederman
2011-05-23  0:38                           ` Changli Gao
2011-05-23  1:26                             ` Changli Gao
2011-05-23  1:45                               ` Eric W. Biederman
2011-05-23  2:14                                 ` Changli Gao
2011-05-23  9:41                                   ` Eric W. Biederman
2011-05-23 10:43                                     ` Jiri Pirko
2011-05-23 19:48                                       ` Nicolas de Pesloüan
2011-05-24  5:58                                         ` Jiri Pirko
2011-05-24  7:19                                           ` Nicolas de Pesloüan
2011-05-23  1:39                             ` Eric W. Biederman
2011-05-23  6:01                           ` Ben Greear
2011-05-23  9:00                             ` Eric W. Biederman
2011-05-23 16:33                               ` Ben Greear
2011-05-23 19:36                                 ` Nicolas de Pesloüan
2011-05-23 20:24                                   ` Ben Greear
2011-05-23 21:00                                     ` Stephen Hemminger
2011-05-23 21:20                                       ` David Miller
2011-05-23 22:05                                         ` Eric W. Biederman
2011-05-23 22:16                                           ` Stephen Hemminger
2011-05-23 22:48                                             ` Eric W. Biederman
2011-05-23 22:23                                           ` Ben Greear
2011-05-23 23:02                                             ` Eric W. Biederman
2011-05-24  4:20                                               ` Ben Greear
2011-05-24  7:11                                                 ` Nicolas de Pesloüan
2011-05-24  7:44                                                   ` Michał Mirosław
2011-05-24 15:17                                                   ` Ben Greear
2011-05-24  5:19                                           ` David Miller
2011-05-24  7:56                                             ` Eric W. Biederman
2011-05-24 15:44                                             ` Ben Greear
2011-05-24  0:11                                         ` [PATCH] vlan: Fix the b0rked ingress VLAN_FLAG_REORDER_HDR check Eric W. Biederman
2011-05-24  4:54                                           ` David Miller
2011-05-24  6:18                                             ` Eric W. Biederman
2011-05-24  6:24                                               ` David Miller
2011-05-24  7:38                                                 ` Eric W. Biederman
2011-06-02  3:59                                                   ` David Miller
2011-06-02 13:03                                                     ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Eric W. Biederman
2011-06-02 13:15                                                       ` Jiri Pirko
2011-06-02 14:54                                                       ` Changli Gao
2011-06-02 15:26                                                         ` Eric W. Biederman
2011-06-02 23:18                                                           ` Changli Gao
2011-06-06 14:48                                                           ` Jiri Pirko
2011-06-03  3:34                                                       ` padmanabh ratnakar
2011-06-03  3:59                                                         ` Changli Gao
2011-06-05 21:14                                                           ` David Miller
2011-06-10  8:35                                                             ` [PATCH v3] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Jiri Pirko
2011-06-10  9:26                                                               ` Changli Gao
2011-06-10  9:34                                                                 ` Jiri Pirko
2011-06-10  9:49                                                                   ` Changli Gao
2011-06-10 10:35                                                                     ` Jiri Pirko
2011-06-10 11:20                                                                       ` Changli Gao
2011-06-10 12:12                                                                         ` Jiri Pirko
2011-06-10 16:56                                                                         ` Jiri Pirko
2011-06-11  0:05                                                                           ` Changli Gao
2011-06-11 23:16                                                                             ` David Miller
2011-06-08 16:28                                                       ` [PATCH] vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check v2 Jiri Pirko
2011-06-08 23:08                                                         ` Changli Gao
2011-06-09  6:01                                                           ` Jiri Pirko
2011-06-09 11:00                       ` [PATCH 1/3] vlan: Do not support clearing VLAN_FLAG_REORDER_HDR Jiri Pirko
2011-05-22  8:38                 ` [patch net-next-2.6 v2] net: vlan: make non-hw-accel rx path similar to hw-accel Changli Gao
2011-05-22  9:37                   ` Jiri Pirko
2011-05-22 10:17                     ` Changli Gao
2011-05-22 10:26                       ` Eric W. Biederman
2011-05-22 10:40                         ` Changli Gao
2011-05-22 13:16                       ` Jiri Pirko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.