All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
@ 2013-04-09  9:57 Cong Wang
  2013-04-09 17:16 ` David Miller
  2013-04-09 18:08 ` Sridhar Samudrala
  0 siblings, 2 replies; 15+ messages in thread
From: Cong Wang @ 2013-04-09  9:57 UTC (permalink / raw)
  To: netdev; +Cc: Sridhar Samudrala, David S. Miller, Cong Wang

From: Cong Wang <amwang@redhat.com>

This reverts commit 9dcc71e1fdbb7aa10d92a3d35e8a201adc84abd0.
It apparently breaks my vxlan tests between different namespaces.

Cc: Sridhar Samudrala <sri@us.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 drivers/net/vxlan.c |   59 +++++++++++++-------------------------------------
 1 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 9a64715..62a4438 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -912,36 +912,6 @@ static int handle_offloads(struct sk_buff *skb)
 	return 0;
 }
 
-/* Bypass encapsulation if the destination is local */
-static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
-			       struct vxlan_dev *dst_vxlan)
-{
-	struct pcpu_tstats *tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
-	struct pcpu_tstats *rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
-
-	skb->pkt_type = PACKET_HOST;
-	skb->encapsulation = 0;
-	skb->dev = dst_vxlan->dev;
-	__skb_pull(skb, skb_network_offset(skb));
-
-	if (dst_vxlan->flags & VXLAN_F_LEARN)
-		vxlan_snoop(skb->dev, INADDR_LOOPBACK, eth_hdr(skb)->h_source);
-
-	u64_stats_update_begin(&tx_stats->syncp);
-	tx_stats->tx_packets++;
-	tx_stats->tx_bytes += skb->len;
-	u64_stats_update_end(&tx_stats->syncp);
-
-	if (netif_rx(skb) == NET_RX_SUCCESS) {
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;
-		rx_stats->rx_bytes += skb->len;
-		u64_stats_update_end(&rx_stats->syncp);
-	} else {
-		skb->dev->stats.rx_dropped++;
-	}
-}
-
 static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				  struct vxlan_rdst *rdst, bool did_rsc)
 {
@@ -952,6 +922,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	struct vxlanhdr *vxh;
 	struct udphdr *uh;
 	struct flowi4 fl4;
+	unsigned int pkt_len = skb->len;
 	__be32 dst;
 	__u16 src_port, dst_port;
         u32 vni;
@@ -964,8 +935,22 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 	if (!dst) {
 		if (did_rsc) {
+			__skb_pull(skb, skb_network_offset(skb));
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->pkt_type = PACKET_HOST;
+
 			/* short-circuited back to local bridge */
-			vxlan_encap_bypass(skb, vxlan, vxlan);
+			if (netif_rx(skb) == NET_RX_SUCCESS) {
+				struct pcpu_tstats *stats = this_cpu_ptr(dev->tstats);
+
+				u64_stats_update_begin(&stats->syncp);
+				stats->tx_packets++;
+				stats->tx_bytes += pkt_len;
+				u64_stats_update_end(&stats->syncp);
+			} else {
+				dev->stats.tx_errors++;
+				dev->stats.tx_aborted_errors++;
+			}
 			return NETDEV_TX_OK;
 		}
 		goto drop;
@@ -1012,18 +997,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	/* Bypass encapsulation if the destination is local */
-	if (rt->rt_flags & RTCF_LOCAL) {
-		struct vxlan_dev *dst_vxlan;
-
-		ip_rt_put(rt);
-		dst_vxlan = vxlan_find_vni(dev_net(dev), vni);
-		if (!dst_vxlan)
-			goto tx_error;
-		vxlan_encap_bypass(skb, vxlan, dst_vxlan);
-		return NETDEV_TX_OK;
-	}
-
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-09  9:57 [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local" Cong Wang
@ 2013-04-09 17:16 ` David Miller
  2013-04-09 18:08 ` Sridhar Samudrala
  1 sibling, 0 replies; 15+ messages in thread
From: David Miller @ 2013-04-09 17:16 UTC (permalink / raw)
  To: amwang; +Cc: netdev, sri

From: Cong Wang <amwang@redhat.com>
Date: Tue,  9 Apr 2013 17:57:25 +0800

> From: Cong Wang <amwang@redhat.com>
> 
> This reverts commit 9dcc71e1fdbb7aa10d92a3d35e8a201adc84abd0.
> It apparently breaks my vxlan tests between different namespaces.
> 
> Cc: Sridhar Samudrala <sri@us.ibm.com>
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <amwang@redhat.com>

How about you simply updating the network namespace attached to the
packet (or whatever piece of state isn't set properly) rather than a
complete revert of his change?

I'm not applying this until you attempt this or the original author
has an opportunity to address this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-09  9:57 [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local" Cong Wang
  2013-04-09 17:16 ` David Miller
@ 2013-04-09 18:08 ` Sridhar Samudrala
  2013-04-10 14:29   ` Sergei Shtylyov
  2013-04-11  2:10   ` Cong Wang
  1 sibling, 2 replies; 15+ messages in thread
From: Sridhar Samudrala @ 2013-04-09 18:08 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, David S. Miller

On Tue, 2013-04-09 at 17:57 +0800, Cong Wang wrote:
> From: Cong Wang <amwang@redhat.com>
> 
> This reverts commit 9dcc71e1fdbb7aa10d92a3d35e8a201adc84abd0.
> It apparently breaks my vxlan tests between different namespaces.
> 
I haven't tried vxlan with network namespaces.
This patch effects the following 2 code paths
- when source and destination endpoints are on the same bridge and
  route short-circuiting is enabled. I guess you are not hitting
  this path as this is possible only if you specify 'rsc' flag when
  creating vxlan device.
- when source and destination endpoints belonging to different vni's
  are on 2 different bridges on the same host. encap bypass is done
  in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
  you must be hitting this path and the following patch should fix
  it by only doing bypass if the source and dest devices belong to 
  the same net. Can you try it and see if it fixes your tests?

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 9a64715..d53d8cb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1012,12 +1012,15 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	/* Bypass encapsulation if the destination is local */
-	if (rt->rt_flags & RTCF_LOCAL) {
+	/* Bypass encapsulation if the destination is local and in the same
+	   network namespace.
+	 */
+	if (net_eq(dev_net(dev), dev_net(rt->dst.dev)) &&
+	     rt->rt_flags & RTCF_LOCAL) {
 		struct vxlan_dev *dst_vxlan;
 
+		dst_vxlan = vxlan_find_vni(dev_net(rt->dst.dev), vni);
 		ip_rt_put(rt);
-		dst_vxlan = vxlan_find_vni(dev_net(dev), vni);
 		if (!dst_vxlan)
 			goto tx_error;
 		vxlan_encap_bypass(skb, vxlan, dst_vxlan);

  
Thanks
Sridhar

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-09 18:08 ` Sridhar Samudrala
@ 2013-04-10 14:29   ` Sergei Shtylyov
  2013-04-11  2:10   ` Cong Wang
  1 sibling, 0 replies; 15+ messages in thread
From: Sergei Shtylyov @ 2013-04-10 14:29 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: Cong Wang, netdev, David S. Miller

Hello.

On 09-04-2013 22:08, Sridhar Samudrala wrote:

>> From: Cong Wang <amwang@redhat.com>

>> This reverts commit 9dcc71e1fdbb7aa10d92a3d35e8a201adc84abd0.
>> It apparently breaks my vxlan tests between different namespaces.

> I haven't tried vxlan with network namespaces.
> This patch effects the following 2 code paths
> - when source and destination endpoints are on the same bridge and
>    route short-circuiting is enabled. I guess you are not hitting
>    this path as this is possible only if you specify 'rsc' flag when
>    creating vxlan device.
> - when source and destination endpoints belonging to different vni's
>    are on 2 different bridges on the same host. encap bypass is done
>    in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
>    you must be hitting this path and the following patch should fix
>    it by only doing bypass if the source and dest devices belong to
>    the same net. Can you try it and see if it fixes your tests?

> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 9a64715..d53d8cb 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1012,12 +1012,15 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>   		goto tx_error;
>   	}
>
> -	/* Bypass encapsulation if the destination is local */
> -	if (rt->rt_flags & RTCF_LOCAL) {
> +	/* Bypass encapsulation if the destination is local and in the same
> +	   network namespace.
> +	 */

    Note that the preferred multi-line comment style in the networking code is:

/* bla
  * bla
  */

WBR, Sergei

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-09 18:08 ` Sridhar Samudrala
  2013-04-10 14:29   ` Sergei Shtylyov
@ 2013-04-11  2:10   ` Cong Wang
  2013-04-11  4:53     ` Sridhar Samudrala
  1 sibling, 1 reply; 15+ messages in thread
From: Cong Wang @ 2013-04-11  2:10 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: netdev, David S. Miller

On Tue, 2013-04-09 at 11:08 -0700, Sridhar Samudrala wrote:
> On Tue, 2013-04-09 at 17:57 +0800, Cong Wang wrote:
> > From: Cong Wang <amwang@redhat.com>
> > 
> > This reverts commit 9dcc71e1fdbb7aa10d92a3d35e8a201adc84abd0.
> > It apparently breaks my vxlan tests between different namespaces.
> > 
> I haven't tried vxlan with network namespaces.
> This patch effects the following 2 code paths
> - when source and destination endpoints are on the same bridge and
>   route short-circuiting is enabled. I guess you are not hitting
>   this path as this is possible only if you specify 'rsc' flag when
>   creating vxlan device.

No, I didn't specify this flag.

> - when source and destination endpoints belonging to different vni's
>   are on 2 different bridges on the same host. encap bypass is done
>   in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
>   you must be hitting this path and the following patch should fix
>   it by only doing bypass if the source and dest devices belong to 
>   the same net. Can you try it and see if it fixes your tests?

I just tested it, unfortunately it doesn't work, the bug still exists.

If you need any other info, please let me know.

Thanks.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-11  2:10   ` Cong Wang
@ 2013-04-11  4:53     ` Sridhar Samudrala
  2013-04-11  5:55       ` Cong Wang
  0 siblings, 1 reply; 15+ messages in thread
From: Sridhar Samudrala @ 2013-04-11  4:53 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, David S. Miller

On 4/10/2013 7:10 PM, Cong Wang wrote:
>> - when source and destination endpoints belonging to different vni's
>>    are on 2 different bridges on the same host. encap bypass is done
>>    in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
>>    you must be hitting this path and the following patch should fix
>>    it by only doing bypass if the source and dest devices belong to
>>    the same net. Can you try it and see if it fixes your tests?
> I just tested it, unfortunately it doesn't work, the bug still exists.
>
> If you need any other info, please let me know.
So does it mean that you are hitting the if condition that does encap 
bypass
even afterthe net_eq() check? Do the tests pass If you comment out the 
'if' block?

Can you share your test config/scripts so that i can try out your setup if
it is not toocomplicated?

Thanks
Sridhar

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-11  4:53     ` Sridhar Samudrala
@ 2013-04-11  5:55       ` Cong Wang
  2013-04-11  6:33         ` Sridhar Samudrala
  2013-04-11 23:59         ` Sridhar Samudrala
  0 siblings, 2 replies; 15+ messages in thread
From: Cong Wang @ 2013-04-11  5:55 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: netdev, David S. Miller



----- Original Message -----
> On 4/10/2013 7:10 PM, Cong Wang wrote:
> >> - when source and destination endpoints belonging to different vni's
> >>    are on 2 different bridges on the same host. encap bypass is done
> >>    in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
> >>    you must be hitting this path and the following patch should fix
> >>    it by only doing bypass if the source and dest devices belong to
> >>    the same net. Can you try it and see if it fixes your tests?
> > I just tested it, unfortunately it doesn't work, the bug still exists.
> >
> > If you need any other info, please let me know.
> So does it mean that you are hitting the if condition that does encap
> bypass
> even afterthe net_eq() check? Do the tests pass If you comment out the
> 'if' block?

Yes, after adding a printk inside the 'if' block, I got:

[   71.456329] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
[   71.596551] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
[   72.028574] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
[   72.436384] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
[   73.028576] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
[   73.185134] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
[   73.436582] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
[   74.184251] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0

It seems the dst dev is the dev which vxlan0 setup on, so
there is no way to know if the packet is targeted for a different netns
on the same host, at least I don't find such RTCF_* flag.

I'd propose to revert that commit partially:

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 9a64715..0847564 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1012,18 +1012,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                goto tx_error;
        }
 
-       /* Bypass encapsulation if the destination is local */
-       if (rt->rt_flags & RTCF_LOCAL) {
-               struct vxlan_dev *dst_vxlan;
-
-               ip_rt_put(rt);
-               dst_vxlan = vxlan_find_vni(dev_net(dev), vni);
-               if (!dst_vxlan)
-                       goto tx_error;
-               vxlan_encap_bypass(skb, vxlan, dst_vxlan);
-               return NETDEV_TX_OK;
-       }
-
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
                              IPSKB_REROUTED);


> 
> Can you share your test config/scripts so that i can try out your setup if
> it is not toocomplicated?
> 


Sure, here is what I did:

1) create a veth pair: veth0 and veth1
2) create a new netns
3) move veth1 to the new netns
4) setup vxlan0 on veth0
5) setup vxlan0 on veth1 in the new netns
6) ping remote, that is the IP of the vxlan0 in new netns

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-11  5:55       ` Cong Wang
@ 2013-04-11  6:33         ` Sridhar Samudrala
  2013-04-11 23:59         ` Sridhar Samudrala
  1 sibling, 0 replies; 15+ messages in thread
From: Sridhar Samudrala @ 2013-04-11  6:33 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, David S. Miller

On 4/10/2013 10:55 PM, Cong Wang wrote:
>
> ----- Original Message -----
>> On 4/10/2013 7:10 PM, Cong Wang wrote:
>>>> - when source and destination endpoints belonging to different vni's
>>>>     are on 2 different bridges on the same host. encap bypass is done
>>>>     in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
>>>>     you must be hitting this path and the following patch should fix
>>>>     it by only doing bypass if the source and dest devices belong to
>>>>     the same net. Can you try it and see if it fixes your tests?
>>> I just tested it, unfortunately it doesn't work, the bug still exists.
>>>
>>> If you need any other info, please let me know.
>> So does it mean that you are hitting the if condition that does encap
>> bypass
>> even afterthe net_eq() check? Do the tests pass If you comment out the
>> 'if' block?
> Yes, after adding a printk inside the 'if' block, I got:
>
> [   71.456329] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   71.596551] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
> [   72.028574] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   72.436384] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
> [   73.028576] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   73.185134] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   73.436582] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
> [   74.184251] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
>
> It seems the dst dev is the dev which vxlan0 setup on, so
> there is no way to know if the packet is targeted for a different netns
> on the same host, at least I don't find such RTCF_* flag.
>
> I'd propose to revert that commit partially:
I think we should spend some more time to address this issue correctly.
Bypassing encap makes a significant improvement in performance when the 
dest.
endpoint is on the same host.
So is vxlan_encap_bypass() getting called or are you hitting goto tx_error?

>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 9a64715..0847564 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1012,18 +1012,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>                  goto tx_error;
>          }
>
> -       /* Bypass encapsulation if the destination is local */
> -       if (rt->rt_flags & RTCF_LOCAL) {
> -               struct vxlan_dev *dst_vxlan;
> -
> -               ip_rt_put(rt);
> -               dst_vxlan = vxlan_find_vni(dev_net(dev), vni);
> -               if (!dst_vxlan)
> -                       goto tx_error;
> -               vxlan_encap_bypass(skb, vxlan, dst_vxlan);
> -               return NETDEV_TX_OK;
> -       }
> -
>          memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>          IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
>                                IPSKB_REROUTED);
>
>
>> Can you share your test config/scripts so that i can try out your setup if
>> it is not toocomplicated?
>>
>
> Sure, here is what I did:
>
> 1) create a veth pair: veth0 and veth1
> 2) create a new netns
> 3) move veth1 to the new netns
> 4) setup vxlan0 on veth0
> 5) setup vxlan0 on veth1 in the new netns
> 6) ping remote, that is the IP of the vxlan0 in new netns
>
I am not all that familiar with creating netns and veth interfaces.
I guess we can do all this via 'ip' command.
Can you give me a script with the exact commands to do this setup?

Thanks
Sridhar

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-11  5:55       ` Cong Wang
  2013-04-11  6:33         ` Sridhar Samudrala
@ 2013-04-11 23:59         ` Sridhar Samudrala
  2013-04-12  8:05           ` Cong Wang
  2013-04-12 19:19           ` David Miller
  1 sibling, 2 replies; 15+ messages in thread
From: Sridhar Samudrala @ 2013-04-11 23:59 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, David S. Miller

On Thu, 2013-04-11 at 01:55 -0400, Cong Wang wrote:
> 
> ----- Original Message -----
> > On 4/10/2013 7:10 PM, Cong Wang wrote:
> > >> - when source and destination endpoints belonging to different vni's
> > >>    are on 2 different bridges on the same host. encap bypass is done
> > >>    in this scenario by checking if rt_flags has RTCF_LOCAL set. I think
> > >>    you must be hitting this path and the following patch should fix
> > >>    it by only doing bypass if the source and dest devices belong to
> > >>    the same net. Can you try it and see if it fixes your tests?
> > > I just tested it, unfortunately it doesn't work, the bug still exists.
> > >
> > > If you need any other info, please let me know.
> > So does it mean that you are hitting the if condition that does encap
> > bypass
> > even afterthe net_eq() check? Do the tests pass If you comment out the
> > 'if' block?
> 
> Yes, after adding a printk inside the 'if' block, I got:
> 
> [   71.456329] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   71.596551] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
> [   72.028574] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   72.436384] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
> [   73.028576] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   73.185134] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> [   73.436582] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth1
> [   74.184251] vxlan: dev: vxlan0, dst: 224.8.8.8, dst dev: veth0
> 
> It seems the dst dev is the dev which vxlan0 setup on, so
> there is no way to know if the packet is targeted for a different netns
> on the same host, at least I don't find such RTCF_* flag.

OK. i was able to setup vxlan between 2 net namespaces and reproduce
the issue.

The following patch fixes the issue for me. can you try it out?

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 9a64715..d6509de 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1013,7 +1013,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	/* Bypass encapsulation if the destination is local */
-	if (rt->rt_flags & RTCF_LOCAL) {
+	if (rt->dst.dev->flags & IFF_LOOPBACK) {
 		struct vxlan_dev *dst_vxlan;
 
 		ip_rt_put(rt);

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-11 23:59         ` Sridhar Samudrala
@ 2013-04-12  8:05           ` Cong Wang
  2013-04-12 19:19           ` David Miller
  1 sibling, 0 replies; 15+ messages in thread
From: Cong Wang @ 2013-04-12  8:05 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: netdev, David S. Miller



----- Original Message -----
> 
> The following patch fixes the issue for me. can you try it out?
> 
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 9a64715..d6509de 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1013,7 +1013,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb,
> struct net_device *dev,
>  	}
>  
>  	/* Bypass encapsulation if the destination is local */
> -	if (rt->rt_flags & RTCF_LOCAL) {
> +	if (rt->dst.dev->flags & IFF_LOOPBACK) {
>  		struct vxlan_dev *dst_vxlan;
>  
>  		ip_rt_put(rt);
> 

It almost surely can fix the problem, but do you really just want to bypass
encap for loopback devcie? Not all local devices?

The title of your original commit "vxlan: Bypass encapsulation if the destination is local"
is confusing...

(Sorry for the delay, I am on vacation.)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-11 23:59         ` Sridhar Samudrala
  2013-04-12  8:05           ` Cong Wang
@ 2013-04-12 19:19           ` David Miller
  2013-04-12 23:07             ` Sridhar Samudrala
  1 sibling, 1 reply; 15+ messages in thread
From: David Miller @ 2013-04-12 19:19 UTC (permalink / raw)
  To: sri; +Cc: amwang, netdev

From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 11 Apr 2013 16:59:59 -0700

> @@ -1013,7 +1013,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>  	}
>  
>  	/* Bypass encapsulation if the destination is local */
> -	if (rt->rt_flags & RTCF_LOCAL) {
> +	if (rt->dst.dev->flags & IFF_LOOPBACK) {
>  		struct vxlan_dev *dst_vxlan;

This looks terrible, and ad-hoc.  You need to find a cleaner
way to express exactly what you're trying to do otherwise I'm
reverting your change.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-12 19:19           ` David Miller
@ 2013-04-12 23:07             ` Sridhar Samudrala
  2013-04-12 23:17               ` David Miller
  2013-04-15  2:31               ` Cong Wang
  0 siblings, 2 replies; 15+ messages in thread
From: Sridhar Samudrala @ 2013-04-12 23:07 UTC (permalink / raw)
  To: David Miller; +Cc: amwang, netdev

On 4/12/2013 12:19 PM, David Miller wrote:
> From: Sridhar Samudrala <sri@us.ibm.com>
> Date: Thu, 11 Apr 2013 16:59:59 -0700
>
>> @@ -1013,7 +1013,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>>   	}
>>   
>>   	/* Bypass encapsulation if the destination is local */
>> -	if (rt->rt_flags & RTCF_LOCAL) {
>> +	if (rt->dst.dev->flags & IFF_LOOPBACK) {
>>   		struct vxlan_dev *dst_vxlan;
> This looks terrible, and ad-hoc.  You need to find a cleaner
> way to express exactly what you're trying to do otherwise I'm
> reverting your change.
The idea is to bypass encap if the destination vxlan endpoint is on the same
host. To do this, i thought it is good enough to check if the route to reach
the destination ip is a local route.(RTCF_LOCAL is set).
This works fine for all the cases where destination ip is assigned to a
normal ethernet device. rt->dst.dev points to loopback device.

In case of veth(veth0,veth1), where the peer(veth1) and the destination 
vxlan
endpoint are on a different netns, ip_route_output for veth1's ipis 
returning
a route entry that has RTCF_LOCAL set and rt->dst.dev pointing to veth0.
Is it a bug that RTCF_LOCAL is set here when veth0's peer is moved to a
different netns?
We don't want to bypass encap in this scenario.

To address this behavior seen with veth, i had to change the if 
condition to
check for rt->dst.dev->flags rather than rt->rt_flags.

Thanks
Sridhar

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-12 23:07             ` Sridhar Samudrala
@ 2013-04-12 23:17               ` David Miller
  2013-04-15  2:31               ` Cong Wang
  1 sibling, 0 replies; 15+ messages in thread
From: David Miller @ 2013-04-12 23:17 UTC (permalink / raw)
  To: sri; +Cc: amwang, netdev

From: Sridhar Samudrala <sri@us.ibm.com>
Date: Fri, 12 Apr 2013 16:07:41 -0700

> Is it a bug that RTCF_LOCAL is set here when veth0's peer is moved
> to a different netns?

Nope.

RTCF_LOCAL is set in several situations.

For example, it would be set on a broadcast route for a subnet we are
on.  It'd also be set on a multicast route for a multicast group the
local system is subscribed to.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-12 23:07             ` Sridhar Samudrala
  2013-04-12 23:17               ` David Miller
@ 2013-04-15  2:31               ` Cong Wang
  2013-04-15  4:20                 ` Sridhar Samudrala
  1 sibling, 1 reply; 15+ messages in thread
From: Cong Wang @ 2013-04-15  2:31 UTC (permalink / raw)
  To: Sridhar Samudrala; +Cc: David Miller, netdev

On Fri, 2013-04-12 at 16:07 -0700, Sridhar Samudrala wrote:
> To address this behavior seen with veth, i had to change the if 
> condition to
> check for rt->dst.dev->flags rather than rt->rt_flags. 

There is no specific IFF_ flag for veth, nor I think introducing one is
a correct fix.

I think we can just revert it for now, since it is not very easy to fix.
You can, of course, send a bug-free version later. This regression
blocks my VXLAN IPv6 support patches, by the way.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local"
  2013-04-15  2:31               ` Cong Wang
@ 2013-04-15  4:20                 ` Sridhar Samudrala
  0 siblings, 0 replies; 15+ messages in thread
From: Sridhar Samudrala @ 2013-04-15  4:20 UTC (permalink / raw)
  To: Cong Wang; +Cc: Sridhar Samudrala, David Miller, netdev, mike.rapoport

On 4/14/2013 7:31 PM, Cong Wang wrote:
> On Fri, 2013-04-12 at 16:07 -0700, Sridhar Samudrala wrote:
>> To address this behavior seen with veth, i had to change the if
>> condition to
>> check for rt->dst.dev->flags rather than rt->rt_flags.
> There is no specific IFF_ flag for veth, nor I think introducing one is
> a correct fix.
>
> I think we can just revert it for now, since it is not very easy to fix.
> You can, of course, send a bug-free version later. This regression
> blocks my VXLAN IPv6 support patches, by the way.
>
>
I am not suggesting adding a new IFF_ flag for veth. I was referring to the
IFF_LOOPBACKflag and it should work fine for your setup.

However, i think Mike Rapaport't patch that adds a check to test for 
multicast/
broadcast routeisa better fix. Did you try that patch?
I tried it in my setup using both vxlan/DOVE configuration between VMs 
in the
same networknamespace on 2 different bridges as well as the veth 
configuration
between 2 network namespsaces.

Could you try his patch in your configuration. I think it will work and 
if so
we should go withthat patch.

Thanks
Sridhar

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-04-15  4:20 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-09  9:57 [Patch net-next] vxlan: revert "vxlan: Bypass encapsulation if the destination is local" Cong Wang
2013-04-09 17:16 ` David Miller
2013-04-09 18:08 ` Sridhar Samudrala
2013-04-10 14:29   ` Sergei Shtylyov
2013-04-11  2:10   ` Cong Wang
2013-04-11  4:53     ` Sridhar Samudrala
2013-04-11  5:55       ` Cong Wang
2013-04-11  6:33         ` Sridhar Samudrala
2013-04-11 23:59         ` Sridhar Samudrala
2013-04-12  8:05           ` Cong Wang
2013-04-12 19:19           ` David Miller
2013-04-12 23:07             ` Sridhar Samudrala
2013-04-12 23:17               ` David Miller
2013-04-15  2:31               ` Cong Wang
2013-04-15  4:20                 ` Sridhar Samudrala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.