All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ipv6: Fix the pmtu path for connected UDP socket
@ 2016-03-14 20:59 Wei Wang
  2016-03-16 23:53 ` David Miller
  2016-03-18  7:17 ` Martin KaFai Lau
  0 siblings, 2 replies; 32+ messages in thread
From: Wei Wang @ 2016-03-14 20:59 UTC (permalink / raw)
  To: David S . Miller; +Cc: Eric Dumazet, netdev, Wei Wang

From: Wei Wang <weiwan@google.com>

When ICMPV6_PKT_TOOBIG message is received by a connected UDP socket,
the new mtu value is not properly updated in the dst_entry associated
with the socket.
This leads to the issue that the mtu value returned by getsockopt(sockfd,
IPPROTO_IPV6, IPV6_MTU, ...) is wrong.
The fix is to update sk->sk_dst_cache and other corresponding fields
when a new routing cache is allocated for the new pmtu in UDP connected
socket case.

Signed-off-by: Wei Wang <weiwan@google.com>
---
 include/net/ip6_route.h |  4 ++--
 net/ipv6/ah6.c          |  2 +-
 net/ipv6/esp6.c         |  2 +-
 net/ipv6/icmp.c         |  2 +-
 net/ipv6/ip6_vti.c      |  2 +-
 net/ipv6/ipcomp6.c      |  2 +-
 net/ipv6/route.c        | 21 +++++++++------------
 7 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 295d291..2b147a8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -115,8 +115,8 @@ void rt6_purge_dflt_routers(struct net *net);
 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		  const struct in6_addr *gwaddr);
 
-void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif,
-		     u32 mark);
+void ip6_update_pmtu(struct net *net, struct sock *sk, struct sk_buff *skb,
+		     __be32 mtu, int oif, u32 mark);
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu);
 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0630a4d5..2c926ec 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -664,7 +664,7 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(net, NULL, skb, info, 0, 0);
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 060a60b..b74847a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -476,7 +476,7 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(net, NULL, skb, info, 0, 0);
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 0a37ddc..03816f5 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -92,7 +92,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct net *net = dev_net(skb->dev);
 
 	if (type == ICMPV6_PKT_TOOBIG)
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(net, NULL, skb, info, 0, 0);
 	else if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
 
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f..fa873ca 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -599,7 +599,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(net, NULL, skb, info, 0, 0);
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 1b9316e..c07a5ac 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -76,7 +76,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(net, NULL, skb, info, 0, 0);
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed44663..8f6a5f1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1346,7 +1346,7 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
 }
 
-static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+static void __ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 				 const struct ipv6hdr *iph, u32 mtu)
 {
 	struct rt6_info *rt6 = (struct rt6_info *)dst;
@@ -1377,12 +1377,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
-
-			/* ip6_ins_rt(nrt6) will bump the
-			 * rt6->rt6i_node->fn_sernum
-			 * which will fail the next rt6_check() and
-			 * invalidate the sk->sk_dst_cache.
-			 */
+			if (sk)
+				ip6_dst_store(sk, &nrt6->dst, daddr, saddr);
 			ip6_ins_rt(nrt6);
 		}
 	}
@@ -1394,8 +1390,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
 }
 
-void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-		     int oif, u32 mark)
+void ip6_update_pmtu(struct net *net, struct sock *sk,
+		     struct sk_buff *skb, __be32 mtu, int oif, u32 mark)
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
@@ -1410,15 +1406,16 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
-		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+		__ip6_rt_update_pmtu(dst, sk, iph, ntohl(mtu));
 	dst_release(dst);
 }
 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
-	ip6_update_pmtu(skb, sock_net(sk), mtu,
-			sk->sk_bound_dev_if, sk->sk_mark);
+	ip6_update_pmtu(sock_net(sk),
+			(sk->sk_state != TCP_ESTABLISHED) ? NULL : sk,
+			skb, mtu, sk->sk_bound_dev_if, sk->sk_mark);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
 
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-14 20:59 [PATCH] ipv6: Fix the pmtu path for connected UDP socket Wei Wang
@ 2016-03-16 23:53 ` David Miller
  2016-03-17  0:22   ` Eric Dumazet
  2016-03-18  7:17 ` Martin KaFai Lau
  1 sibling, 1 reply; 32+ messages in thread
From: David Miller @ 2016-03-16 23:53 UTC (permalink / raw)
  To: weiwan; +Cc: edumazet, netdev

From: Wei Wang <weiwan@google.com>
Date: Mon, 14 Mar 2016 13:59:47 -0700

> From: Wei Wang <weiwan@google.com>
> 
> When ICMPV6_PKT_TOOBIG message is received by a connected UDP socket,
> the new mtu value is not properly updated in the dst_entry associated
> with the socket.
> This leads to the issue that the mtu value returned by getsockopt(sockfd,
> IPPROTO_IPV6, IPV6_MTU, ...) is wrong.
> The fix is to update sk->sk_dst_cache and other corresponding fields
> when a new routing cache is allocated for the new pmtu in UDP connected
> socket case.
> 
> Signed-off-by: Wei Wang <weiwan@google.com>

Wait a second:

>  		if (nrt6) {
>  			rt6_do_update_pmtu(nrt6, mtu);
> -
> -			/* ip6_ins_rt(nrt6) will bump the
> -			 * rt6->rt6i_node->fn_sernum
> -			 * which will fail the next rt6_check() and
> -			 * invalidate the sk->sk_dst_cache.
> -			 */
> +			if (sk)
> +				ip6_dst_store(sk, &nrt6->dst, daddr, saddr);
>  			ip6_ins_rt(nrt6);
>  		}
>  	}

I still haven't seen a satisfactory answer as to why the as-designed
invalidation mechanism using fn_sernum is not working.

If that's broken, then a lot of other things won't work properly
either.

I've read the ip6_ins_rt() code path several times, and it always
increments the serial number, and therefore the next dst->check() call
(which every cached route usage should invoke) should invalide
this socket's route and lookup the new one.

Why does this not work?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-16 23:53 ` David Miller
@ 2016-03-17  0:22   ` Eric Dumazet
  2016-03-17  2:38     ` David Miller
  0 siblings, 1 reply; 32+ messages in thread
From: Eric Dumazet @ 2016-03-17  0:22 UTC (permalink / raw)
  To: David Miller; +Cc: weiwan, edumazet, netdev

On Wed, 2016-03-16 at 19:53 -0400, David Miller wrote:
> From: Wei Wang <weiwan@google.com>
> Date: Mon, 14 Mar 2016 13:59:47 -0700
> 
> > From: Wei Wang <weiwan@google.com>
> > 
> > When ICMPV6_PKT_TOOBIG message is received by a connected UDP socket,
> > the new mtu value is not properly updated in the dst_entry associated
> > with the socket.
> > This leads to the issue that the mtu value returned by getsockopt(sockfd,
> > IPPROTO_IPV6, IPV6_MTU, ...) is wrong.
> > The fix is to update sk->sk_dst_cache and other corresponding fields
> > when a new routing cache is allocated for the new pmtu in UDP connected
> > socket case.
> > 
> > Signed-off-by: Wei Wang <weiwan@google.com>
> 
> Wait a second:
> 
> >  		if (nrt6) {
> >  			rt6_do_update_pmtu(nrt6, mtu);
> > -
> > -			/* ip6_ins_rt(nrt6) will bump the
> > -			 * rt6->rt6i_node->fn_sernum
> > -			 * which will fail the next rt6_check() and
> > -			 * invalidate the sk->sk_dst_cache.
> > -			 */
> > +			if (sk)
> > +				ip6_dst_store(sk, &nrt6->dst, daddr, saddr);
> >  			ip6_ins_rt(nrt6);
> >  		}
> >  	}
> 
> I still haven't seen a satisfactory answer as to why the as-designed
> invalidation mechanism using fn_sernum is not working.
> 
> If that's broken, then a lot of other things won't work properly
> either.
> 
> I've read the ip6_ins_rt() code path several times, and it always
> increments the serial number, and therefore the next dst->check() call
> (which every cached route usage should invoke) should invalide
> this socket's route and lookup the new one.
> 
> Why does this not work?


One of the issue is that IPV6_MTU getsockopt() will not check the dst,
but simply use __sk_dst_get() : It will then report old mtu.

Not sure we want to use the full check and then if dst appears to be
obsolete, do another route lookup ?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-17  0:22   ` Eric Dumazet
@ 2016-03-17  2:38     ` David Miller
  2016-03-17  3:34       ` Eric Dumazet
  0 siblings, 1 reply; 32+ messages in thread
From: David Miller @ 2016-03-17  2:38 UTC (permalink / raw)
  To: eric.dumazet; +Cc: weiwan, edumazet, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 16 Mar 2016 17:22:07 -0700

> One of the issue is that IPV6_MTU getsockopt() will not check the dst,
> but simply use __sk_dst_get() : It will then report old mtu.

That's a bug.

ipv4 does it right with a proper sk_dst_get() and so should
ipv6.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-17  2:38     ` David Miller
@ 2016-03-17  3:34       ` Eric Dumazet
  2016-03-18 17:19         ` Cong Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Eric Dumazet @ 2016-03-17  3:34 UTC (permalink / raw)
  To: David Miller; +Cc: weiwan, edumazet, netdev

On Wed, 2016-03-16 at 22:38 -0400, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Wed, 16 Mar 2016 17:22:07 -0700
> 
> > One of the issue is that IPV6_MTU getsockopt() will not check the dst,
> > but simply use __sk_dst_get() : It will then report old mtu.
> 
> That's a bug.
> 
> ipv4 does it right with a proper sk_dst_get() and so should
> ipv6.

Using rcu + __sk_dst_get() (in IPv6) is absolutely equivalent to
sk_dst_get() + dst_release() (in IPv4), modulo atomic ops on dst
refcnt...

Presumably IPv6 implementation using rcu is slightly better if this
getsockopt() is badly needed, but apparently nobody cares.

sk_dst_check() is a different beast.

The problem is that dst_mtu(dst) is not able to perform a route lookup
by itself.

Do we really want to use ip6_sk_dst_lookup_flow() and its associated
setup for this IP6_MTU thing, and maybe other points we might use an
obsolete dst ?

Looking at the complexity of udpv6_sendmsg() and rawv6_sendmsg() I
really wonder if it is worth extracting the route logic.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-14 20:59 [PATCH] ipv6: Fix the pmtu path for connected UDP socket Wei Wang
  2016-03-16 23:53 ` David Miller
@ 2016-03-18  7:17 ` Martin KaFai Lau
  1 sibling, 0 replies; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-18  7:17 UTC (permalink / raw)
  To: Wei Wang; +Cc: David S . Miller, Eric Dumazet, netdev

On Mon, Mar 14, 2016 at 01:59:47PM -0700, Wei Wang wrote:
> From: Wei Wang <weiwan@google.com>
>
> When ICMPV6_PKT_TOOBIG message is received by a connected UDP socket,
> the new mtu value is not properly updated in the dst_entry associated
> with the socket.
A nit picking, the new mtu value cannot always be set directly
to the current dst_entry associated with the socket (i.e. sk->sk_dst_cache).
In this case, a RTF_CACHE clone has to be created.

The problem could be better understood if the commit message was like:
"After creating the RTF_CACHE clone (with the new mtu value),
sk->sk_dst_cache is not _immediately_ set to this RTF_CACHE clone.
getsockopt(IPV6_MTU) does not do a dst_check() first.  Hence,
if there was no outgoing message to trigger the dst_check() invalidation
logic, it may return the stale mtu value."

> -void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif,
> -		     u32 mark);
> +void ip6_update_pmtu(struct net *net, struct sock *sk, struct sk_buff *skb,
> +		     __be32 mtu, int oif, u32 mark);
>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu);
This change seems to make the API a bit confusing.  The none _sk_ version
is also taking a sk param now.

> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1346,7 +1346,7 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
>  		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
>  }
>
> -static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
> +static void __ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
>  				 const struct ipv6hdr *iph, u32 mtu)
>  {
>  	struct rt6_info *rt6 = (struct rt6_info *)dst;
> @@ -1377,12 +1377,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
>  		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
>  		if (nrt6) {
>  			rt6_do_update_pmtu(nrt6, mtu);
> -
> -			/* ip6_ins_rt(nrt6) will bump the
> -			 * rt6->rt6i_node->fn_sernum
> -			 * which will fail the next rt6_check() and
> -			 * invalidate the sk->sk_dst_cache.
> -			 */
> +			if (sk)
> +				ip6_dst_store(sk, &nrt6->dst, daddr, saddr);
daddr/saddr could be from iph which is from skb.  Considering skb could be
gone, are they suitable to be set in np->daddr_cache and np->saddr_cache?

After looking at this patch, I like your last patch more because this
problem seems to be limited to the connected udp socket only (?) and
udp knows better on what to pass to ip6_dst_store().  Feeling bad
now about steering you to this direction :(

Thanks,
-- Martin

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-17  3:34       ` Eric Dumazet
@ 2016-03-18 17:19         ` Cong Wang
       [not found]           ` <CAEA6p_CfDYYLf-s0pUy7VNFH+46PDZSXNzUExXRAM_xv2w8tcw@mail.gmail.com>
  0 siblings, 1 reply; 32+ messages in thread
From: Cong Wang @ 2016-03-18 17:19 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, weiwan, Eric Dumazet, Linux Kernel Network Developers

On Wed, Mar 16, 2016 at 8:34 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2016-03-16 at 22:38 -0400, David Miller wrote:
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Wed, 16 Mar 2016 17:22:07 -0700
>>
>> > One of the issue is that IPV6_MTU getsockopt() will not check the dst,
>> > but simply use __sk_dst_get() : It will then report old mtu.
>>
>> That's a bug.
>>
>> ipv4 does it right with a proper sk_dst_get() and so should
>> ipv6.
>
> Using rcu + __sk_dst_get() (in IPv6) is absolutely equivalent to
> sk_dst_get() + dst_release() (in IPv4), modulo atomic ops on dst
> refcnt...
>
> Presumably IPv6 implementation using rcu is slightly better if this
> getsockopt() is badly needed, but apparently nobody cares.
>
> sk_dst_check() is a different beast.
>
> The problem is that dst_mtu(dst) is not able to perform a route lookup
> by itself.
>
> Do we really want to use ip6_sk_dst_lookup_flow() and its associated
> setup for this IP6_MTU thing, and maybe other points we might use an
> obsolete dst ?
>
> Looking at the complexity of udpv6_sendmsg() and rawv6_sendmsg() I
> really wonder if it is worth extracting the route logic.

I think the reason why IPv4 does this correctly is ipv4_sk_update_pmtu()
takes care of the sk cached dst, while the similar part for IPv6,
ip6_sk_update_pmtu() does not that. As Martin already points it out
in the previous version.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
       [not found]           ` <CAEA6p_CfDYYLf-s0pUy7VNFH+46PDZSXNzUExXRAM_xv2w8tcw@mail.gmail.com>
@ 2016-03-18 18:10             ` Cong Wang
  2016-03-18 21:26               ` Wei Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Cong Wang @ 2016-03-18 18:10 UTC (permalink / raw)
  To: Wei Wang
  Cc: Martin KaFai Lau, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Fri, Mar 18, 2016 at 10:45 AM, Wei Wang <weiwan@google.com> wrote:
> Thanks all for the comments.
>
> Cong, you are right that for ipv6, the code does not take care of updating
> sk_dst_cache entry like Ipv4. And this patch is updating the entry by
> calling ip6_dst_store() to achieve similar functionality as Ipv4.

My question is why not updating ip6_sk_update_pmtu() to sync with ipv4?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-18 18:10             ` Cong Wang
@ 2016-03-18 21:26               ` Wei Wang
  2016-03-18 22:09                 ` Cong Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Wei Wang @ 2016-03-18 21:26 UTC (permalink / raw)
  To: Cong Wang
  Cc: Wei Wang, Martin KaFai Lau, Eric Dumazet, David Miller,
	Eric Dumazet, Linux Kernel Network Developers

I don't think ip6_sk_update_pmtu() is a good place to put it as all it
does is to call ip6_update_pmtu(). And ip6_update_pmtu() does the
route lookup and call __ip6_rt_update_pmtu.
We can put it in ip6_update_pmtu(). But that still means we need to
pass sk to ip6_update_pmtu() and I don't think it makes any difference
compared to the current fix.

Thanks.
Wei

On Fri, Mar 18, 2016 at 11:10 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Fri, Mar 18, 2016 at 10:45 AM, Wei Wang <weiwan@google.com> wrote:
>> Thanks all for the comments.
>>
>> Cong, you are right that for ipv6, the code does not take care of updating
>> sk_dst_cache entry like Ipv4. And this patch is updating the entry by
>> calling ip6_dst_store() to achieve similar functionality as Ipv4.
>
> My question is why not updating ip6_sk_update_pmtu() to sync with ipv4?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-18 21:26               ` Wei Wang
@ 2016-03-18 22:09                 ` Cong Wang
  2016-03-21 17:13                   ` Wei Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Cong Wang @ 2016-03-18 22:09 UTC (permalink / raw)
  To: Wei Wang
  Cc: Wei Wang, Martin KaFai Lau, Eric Dumazet, David Miller,
	Eric Dumazet, Linux Kernel Network Developers

On Fri, Mar 18, 2016 at 2:26 PM, Wei Wang <tracywwnj@gmail.com> wrote:
> I don't think ip6_sk_update_pmtu() is a good place to put it as all it
> does is to call ip6_update_pmtu(). And ip6_update_pmtu() does the
> route lookup and call __ip6_rt_update_pmtu.
> We can put it in ip6_update_pmtu(). But that still means we need to
> pass sk to ip6_update_pmtu() and I don't think it makes any difference
> compared to the current fix.
>

Well, your patch touches all the callers of ip6_update_pmtu() , if you just
fix ip6_sk_update_pmtu() as I suggested, you only need to change one
function, ideally. And the ipv4 code is there, although I am not sure, it
looks like we can just mimic the logic here:

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed44663..b88c2ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1417,8 +1417,28 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);

 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
-       ip6_update_pmtu(skb, sock_net(sk), mtu,
-                       sk->sk_bound_dev_if, sk->sk_mark);
+       const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+       struct net *net = sock_net(sk);
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+
+       bh_lock_sock(sk);
+
+       memset(&fl6, 0, sizeof(fl6));
+       fl6.flowi6_oif = sk->sk_bound_dev_if;
+       fl6.flowi6_mark = sk->sk_mark ? : IP6_REPLY_MARK(net, skb->mark);
+       fl6.daddr = iph->daddr;
+       fl6.saddr = iph->saddr;
+       fl6.flowlabel = ip6_flowinfo(iph);
+
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (!dst->error)
+               __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+
+       sk_dst_set(sk, &rt->dst);
+       bh_unlock_sock(sk);
+
+       dst_release(dst);
 }


Please don't judge me on the code, it could still miss a lot of things,
but it can show my idea...

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-18 22:09                 ` Cong Wang
@ 2016-03-21 17:13                   ` Wei Wang
  2016-03-22  6:02                     ` Martin KaFai Lau
  0 siblings, 1 reply; 32+ messages in thread
From: Wei Wang @ 2016-03-21 17:13 UTC (permalink / raw)
  To: Cong Wang
  Cc: Wei Wang, Martin KaFai Lau, Eric Dumazet, David Miller,
	Eric Dumazet, Linux Kernel Network Developers

Hey Cong,

This solution probably will not work.
First of all, if you look into __ip6_rt_update_pmtu(), it creates a
new dst and this dst does not get passed back to its caller. So unless
we tweak this function to pass the new dst back, we can only update
sk->sk_dst_cache inside the function itself.
Secondly, ip6_update_pmtu is called in multiple places. Not only here.
I am not sure how many places need to be changed like this. It seems
not a good thing to do.

Thanks.
Wei

On Fri, Mar 18, 2016 at 3:09 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Fri, Mar 18, 2016 at 2:26 PM, Wei Wang <tracywwnj@gmail.com> wrote:
>> I don't think ip6_sk_update_pmtu() is a good place to put it as all it
>> does is to call ip6_update_pmtu(). And ip6_update_pmtu() does the
>> route lookup and call __ip6_rt_update_pmtu.
>> We can put it in ip6_update_pmtu(). But that still means we need to
>> pass sk to ip6_update_pmtu() and I don't think it makes any difference
>> compared to the current fix.
>>
>
> Well, your patch touches all the callers of ip6_update_pmtu() , if you just
> fix ip6_sk_update_pmtu() as I suggested, you only need to change one
> function, ideally. And the ipv4 code is there, although I am not sure, it
> looks like we can just mimic the logic here:
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index ed44663..b88c2ff 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1417,8 +1417,28 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
>
>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
>  {
> -       ip6_update_pmtu(skb, sock_net(sk), mtu,
> -                       sk->sk_bound_dev_if, sk->sk_mark);
> +       const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
> +       struct net *net = sock_net(sk);
> +       struct dst_entry *dst;
> +       struct flowi6 fl6;
> +
> +       bh_lock_sock(sk);
> +
> +       memset(&fl6, 0, sizeof(fl6));
> +       fl6.flowi6_oif = sk->sk_bound_dev_if;
> +       fl6.flowi6_mark = sk->sk_mark ? : IP6_REPLY_MARK(net, skb->mark);
> +       fl6.daddr = iph->daddr;
> +       fl6.saddr = iph->saddr;
> +       fl6.flowlabel = ip6_flowinfo(iph);
> +
> +       dst = ip6_route_output(net, NULL, &fl6);
> +       if (!dst->error)
> +               __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
> +
> +       sk_dst_set(sk, &rt->dst);
> +       bh_unlock_sock(sk);
> +
> +       dst_release(dst);
>  }
>
>
> Please don't judge me on the code, it could still miss a lot of things,
> but it can show my idea...

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-21 17:13                   ` Wei Wang
@ 2016-03-22  6:02                     ` Martin KaFai Lau
  2016-03-22 16:53                       ` Cong Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-22  6:02 UTC (permalink / raw)
  To: Wei Wang
  Cc: Cong Wang, Wei Wang, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Mon, Mar 21, 2016 at 10:13:41AM -0700, Wei Wang wrote:
> Hey Cong,
>
> This solution probably will not work.
> First of all, if you look into __ip6_rt_update_pmtu(), it creates a
> new dst and this dst does not get passed back to its caller. So unless
> we tweak this function to pass the new dst back, we can only update
> sk->sk_dst_cache inside the function itself.
> Secondly, ip6_update_pmtu is called in multiple places. Not only here.
> I am not sure how many places need to be changed like this. It seems
> not a good thing to do.
>
> Thanks.
> Wei
>
> On Fri, Mar 18, 2016 at 3:09 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> > On Fri, Mar 18, 2016 at 2:26 PM, Wei Wang <tracywwnj@gmail.com> wrote:
> >> I don't think ip6_sk_update_pmtu() is a good place to put it as all it
> >> does is to call ip6_update_pmtu(). And ip6_update_pmtu() does the
> >> route lookup and call __ip6_rt_update_pmtu.
> >> We can put it in ip6_update_pmtu(). But that still means we need to
> >> pass sk to ip6_update_pmtu() and I don't think it makes any difference
> >> compared to the current fix.
I think Cong Wang is suggesting, in ip6_sk_update_pmtu():
1. call ip6_upate_pmtu() as it is
2. do a dst_check()
3. re-lookup() if it is invalid
4. and then do a ip6_dst_store()/dst_set

The above is exactly what inet6_csk_update_pmtu(), which was also used in the
first patch, is doing.

In term of difference, AFAICT, the current patch is an optimization in the
sense that the update_pmtu() code path does not have to do a dst_check to
discover its sk->sk_dst_cache is invalid, and then do a relookup to find out
that the just created RTF_CACHE clone should be used.  To get this, it may
make more sense to remove all the relookup code together during update_pmtu().
Even if this slow path was to be optimized, should it be put in a
separate patch where net-next is a better candidate?

I think fixing it in __udp6_lib_err() or what Cong Wang is suggesting makes
more sense for a net branch fix.  If there is logic specific to connected-udp,
I would do it in the __udp6_lib_err() instead.  After looking at
udpv6_sendmsg() and how it calls ip6_dst_store(), may also need to be careful
what daddr and saddr should be passed to ip6_dst_store(), or at least a commit
message.  The first patch is essentially passing NULL to daddr and saddr
while the second patch seems passing something else.

> >>
> >
> > Well, your patch touches all the callers of ip6_update_pmtu() , if you just
> > fix ip6_sk_update_pmtu() as I suggested, you only need to change one
> > function, ideally. And the ipv4 code is there, although I am not sure, it
> > looks like we can just mimic the logic here:
> >
> > diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> > index ed44663..b88c2ff 100644
> > --- a/net/ipv6/route.c
> > +++ b/net/ipv6/route.c
> > @@ -1417,8 +1417,28 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
> >
> >  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
> >  {
> > -       ip6_update_pmtu(skb, sock_net(sk), mtu,
> > -                       sk->sk_bound_dev_if, sk->sk_mark);
> > +       const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
> > +       struct net *net = sock_net(sk);
> > +       struct dst_entry *dst;
> > +       struct flowi6 fl6;
> > +
> > +       bh_lock_sock(sk);
> > +
> > +       memset(&fl6, 0, sizeof(fl6));
> > +       fl6.flowi6_oif = sk->sk_bound_dev_if;
> > +       fl6.flowi6_mark = sk->sk_mark ? : IP6_REPLY_MARK(net, skb->mark);
> > +       fl6.daddr = iph->daddr;
> > +       fl6.saddr = iph->saddr;
> > +       fl6.flowlabel = ip6_flowinfo(iph);
> > +
> > +       dst = ip6_route_output(net, NULL, &fl6);
> > +       if (!dst->error)
> > +               __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
> > +
> > +       sk_dst_set(sk, &rt->dst);
> > +       bh_unlock_sock(sk);
> > +
> > +       dst_release(dst);
> >  }
> >
> >
> > Please don't judge me on the code, it could still miss a lot of things,
> > but it can show my idea...

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22  6:02                     ` Martin KaFai Lau
@ 2016-03-22 16:53                       ` Cong Wang
  2016-03-22 17:39                         ` Martin KaFai Lau
  0 siblings, 1 reply; 32+ messages in thread
From: Cong Wang @ 2016-03-22 16:53 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Wei Wang, Wei Wang, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Mon, Mar 21, 2016 at 11:02 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> I think Cong Wang is suggesting, in ip6_sk_update_pmtu():
> 1. call ip6_upate_pmtu() as it is
> 2. do a dst_check()
> 3. re-lookup() if it is invalid
> 4. and then do a ip6_dst_store()/dst_set
>
> The above is exactly what inet6_csk_update_pmtu(), which was also used in the
> first patch, is doing.

Well, the reasons why I suggest to fix ip6_sk_update_pmtu() instead of anything
else (no matter __udp6_lib_err() in v1 or ip6_update_pmtu() in v2) are that:

1) Catch up with ipv4 part, which is ipv4_sk_update_pmtu()
2) Straight-forward, (ideally) no need to bother other irrelevant
callers like xfrm.

Functionally all the solutions should work to fix Wei's problem, we just need to
find which one is more elegant.


>
> In term of difference, AFAICT, the current patch is an optimization in the
> sense that the update_pmtu() code path does not have to do a dst_check to
> discover its sk->sk_dst_cache is invalid, and then do a relookup to find out
> that the just created RTF_CACHE clone should be used.  To get this, it may
> make more sense to remove all the relookup code together during update_pmtu().
> Even if this slow path was to be optimized, should it be put in a
> separate patch where net-next is a better candidate?
>

Speaking of RTF_CACHE, I am curious why you didn't use FIB next hop exception
as what ipv4 does to cache exceptions? This makes IPv6 has more gap with IPv4.
This is (almost) irrelevant to this patch.


> I think fixing it in __udp6_lib_err() or what Cong Wang is suggesting makes
> more sense for a net branch fix.  If there is logic specific to connected-udp,
> I would do it in the __udp6_lib_err() instead.  After looking at
> udpv6_sendmsg() and how it calls ip6_dst_store(), may also need to be careful
> what daddr and saddr should be passed to ip6_dst_store(), or at least a commit
> message.  The first patch is essentially passing NULL to daddr and saddr
> while the second patch seems passing something else.

Raw socket needs to fix too, we can't just fix __udp6_lib_err(), this is also
why fixing ip6_sk_update_pmtu() is better, its call path is better.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 16:53                       ` Cong Wang
@ 2016-03-22 17:39                         ` Martin KaFai Lau
  2016-03-22 18:03                           ` Wei Wang
  2016-03-22 18:19                           ` Cong Wang
  0 siblings, 2 replies; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-22 17:39 UTC (permalink / raw)
  To: Cong Wang
  Cc: Wei Wang, Wei Wang, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Tue, Mar 22, 2016 at 09:53:35AM -0700, Cong Wang wrote:
> On Mon, Mar 21, 2016 at 11:02 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> > In term of difference, AFAICT, the current patch is an optimization in the
> > sense that the update_pmtu() code path does not have to do a dst_check to
> > discover its sk->sk_dst_cache is invalid, and then do a relookup to find out
> > that the just created RTF_CACHE clone should be used.  To get this, it may
> > make more sense to remove all the relookup code together during update_pmtu().
> > Even if this slow path was to be optimized, should it be put in a
> > separate patch where net-next is a better candidate?
> >
>
> Speaking of RTF_CACHE, I am curious why you didn't use FIB next hop exception
> as what ipv4 does to cache exceptions? This makes IPv6 has more gap with IPv4.
> This is (almost) irrelevant to this patch.
There are a few differences between IPv6 and IPv4.  Both in terms of
data structure and functionality.  The last 'RTF_CACHE on exception' patchset is one
step toward this direction. More patches are needed and are welcomed ;)

>
>
> > I think fixing it in __udp6_lib_err() or what Cong Wang is suggesting makes
> > more sense for a net branch fix.  If there is logic specific to connected-udp,
> > I would do it in the __udp6_lib_err() instead.  After looking at
> > udpv6_sendmsg() and how it calls ip6_dst_store(), may also need to be careful
> > what daddr and saddr should be passed to ip6_dst_store(), or at least a commit
> > message.  The first patch is essentially passing NULL to daddr and saddr
> > while the second patch seems passing something else.
>
> Raw socket needs to fix too, we can't just fix __udp6_lib_err(), this is also
> why fixing ip6_sk_update_pmtu() is better, its call path is better.
I don't see rawv6 socket is storing the dst.  I probably have overlooked it.  Can
you point it out?

Having said that, I don't feel strongly on any of the two places.  I think only
implementation can tell.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 17:39                         ` Martin KaFai Lau
@ 2016-03-22 18:03                           ` Wei Wang
  2016-03-22 18:16                             ` Martin KaFai Lau
  2016-03-22 20:13                             ` Cong Wang
  2016-03-22 18:19                           ` Cong Wang
  1 sibling, 2 replies; 32+ messages in thread
From: Wei Wang @ 2016-03-22 18:03 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Cong Wang, Wei Wang, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

Thanks Martin and Cong.

I guess then we are going with the following fix in ip6_sk_update_pmtu():
1. call ip6_upate_pmtu() as it is
2. do a dst_check()
3. re-lookup() if it is invalid
4. and then do a ip6_dst_store()/dst_set

But one thing here, we will have to generate the same flowi6 in both
ip6_sk_update_pmtu() as well as ip6_update_pmtu(). Is this considered
as a not clean enough fix?


On Tue, Mar 22, 2016 at 10:39 AM, Martin KaFai Lau <kafai@fb.com> wrote:
> On Tue, Mar 22, 2016 at 09:53:35AM -0700, Cong Wang wrote:
>> On Mon, Mar 21, 2016 at 11:02 PM, Martin KaFai Lau <kafai@fb.com> wrote:
>> > In term of difference, AFAICT, the current patch is an optimization in the
>> > sense that the update_pmtu() code path does not have to do a dst_check to
>> > discover its sk->sk_dst_cache is invalid, and then do a relookup to find out
>> > that the just created RTF_CACHE clone should be used.  To get this, it may
>> > make more sense to remove all the relookup code together during update_pmtu().
>> > Even if this slow path was to be optimized, should it be put in a
>> > separate patch where net-next is a better candidate?
>> >
>>
>> Speaking of RTF_CACHE, I am curious why you didn't use FIB next hop exception
>> as what ipv4 does to cache exceptions? This makes IPv6 has more gap with IPv4.
>> This is (almost) irrelevant to this patch.
> There are a few differences between IPv6 and IPv4.  Both in terms of
> data structure and functionality.  The last 'RTF_CACHE on exception' patchset is one
> step toward this direction. More patches are needed and are welcomed ;)
>
>>
>>
>> > I think fixing it in __udp6_lib_err() or what Cong Wang is suggesting makes
>> > more sense for a net branch fix.  If there is logic specific to connected-udp,
>> > I would do it in the __udp6_lib_err() instead.  After looking at
>> > udpv6_sendmsg() and how it calls ip6_dst_store(), may also need to be careful
>> > what daddr and saddr should be passed to ip6_dst_store(), or at least a commit
>> > message.  The first patch is essentially passing NULL to daddr and saddr
>> > while the second patch seems passing something else.
>>
>> Raw socket needs to fix too, we can't just fix __udp6_lib_err(), this is also
>> why fixing ip6_sk_update_pmtu() is better, its call path is better.
> I don't see rawv6 socket is storing the dst.  I probably have overlooked it.  Can
> you point it out?
>
> Having said that, I don't feel strongly on any of the two places.  I think only
> implementation can tell.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 18:03                           ` Wei Wang
@ 2016-03-22 18:16                             ` Martin KaFai Lau
  2016-03-22 20:13                             ` Cong Wang
  1 sibling, 0 replies; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-22 18:16 UTC (permalink / raw)
  To: Wei Wang
  Cc: Cong Wang, Wei Wang, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Tue, Mar 22, 2016 at 11:03:29AM -0700, Wei Wang wrote:
> But one thing here, we will have to generate the same flowi6 in both
> ip6_sk_update_pmtu() as well as ip6_update_pmtu(). Is this considered
> as a not clean enough fix?
If they share common codes to build flowi6, can the common codes
be factored out?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 17:39                         ` Martin KaFai Lau
  2016-03-22 18:03                           ` Wei Wang
@ 2016-03-22 18:19                           ` Cong Wang
  1 sibling, 0 replies; 32+ messages in thread
From: Cong Wang @ 2016-03-22 18:19 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Wei Wang, Wei Wang, Eric Dumazet, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Tue, Mar 22, 2016 at 10:39 AM, Martin KaFai Lau <kafai@fb.com> wrote:
> On Tue, Mar 22, 2016 at 09:53:35AM -0700, Cong Wang wrote:
>> On Mon, Mar 21, 2016 at 11:02 PM, Martin KaFai Lau <kafai@fb.com> wrote:
>> > In term of difference, AFAICT, the current patch is an optimization in the
>> > sense that the update_pmtu() code path does not have to do a dst_check to
>> > discover its sk->sk_dst_cache is invalid, and then do a relookup to find out
>> > that the just created RTF_CACHE clone should be used.  To get this, it may
>> > make more sense to remove all the relookup code together during update_pmtu().
>> > Even if this slow path was to be optimized, should it be put in a
>> > separate patch where net-next is a better candidate?
>> >
>>
>> Speaking of RTF_CACHE, I am curious why you didn't use FIB next hop exception
>> as what ipv4 does to cache exceptions? This makes IPv6 has more gap with IPv4.
>> This is (almost) irrelevant to this patch.
> There are a few differences between IPv6 and IPv4.  Both in terms of
> data structure and functionality.  The last 'RTF_CACHE on exception' patchset is one
> step toward this direction. More patches are needed and are welcomed ;)

Sure, I will take a look at this once net-next is re-open.


>
>>
>>
>> > I think fixing it in __udp6_lib_err() or what Cong Wang is suggesting makes
>> > more sense for a net branch fix.  If there is logic specific to connected-udp,
>> > I would do it in the __udp6_lib_err() instead.  After looking at
>> > udpv6_sendmsg() and how it calls ip6_dst_store(), may also need to be careful
>> > what daddr and saddr should be passed to ip6_dst_store(), or at least a commit
>> > message.  The first patch is essentially passing NULL to daddr and saddr
>> > while the second patch seems passing something else.
>>
>> Raw socket needs to fix too, we can't just fix __udp6_lib_err(), this is also
>> why fixing ip6_sk_update_pmtu() is better, its call path is better.
> I don't see rawv6 socket is storing the dst.  I probably have overlooked it.  Can
> you point it out?


I thought sk->sk_dst_cache is generic to all sockets, but it is up to
each kind of
socket to decide to use it or not, and you are right, raw socket doesn't seem to
care about it even though it calls *_sk_update_pmtu().

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 18:03                           ` Wei Wang
  2016-03-22 18:16                             ` Martin KaFai Lau
@ 2016-03-22 20:13                             ` Cong Wang
  2016-03-22 23:36                               ` Eric Dumazet
  1 sibling, 1 reply; 32+ messages in thread
From: Cong Wang @ 2016-03-22 20:13 UTC (permalink / raw)
  To: Wei Wang
  Cc: Martin KaFai Lau, Wei Wang, Eric Dumazet, David Miller,
	Eric Dumazet, Linux Kernel Network Developers

[-- Attachment #1: Type: text/plain, Size: 429 bytes --]

On Tue, Mar 22, 2016 at 11:03 AM, Wei Wang <tracywwnj@gmail.com> wrote:
> Thanks Martin and Cong.
>
> I guess then we are going with the following fix in ip6_sk_update_pmtu():
> 1. call ip6_upate_pmtu() as it is
> 2. do a dst_check()
> 3. re-lookup() if it is invalid
> 4. and then do a ip6_dst_store()/dst_set

Exactly, please try the attached patch. Note I did nothing more than a
compile test.

Does it make sense to you now?

[-- Attachment #2: ipv6-route-pmtu.diff --]
[-- Type: text/plain, Size: 3368 bytes --]

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed44663..fcea05e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1346,18 +1346,20 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
 }
 
-static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
-				 const struct ipv6hdr *iph, u32 mtu)
+static struct dst_entry* __ip6_rt_update_pmtu(struct dst_entry *dst,
+					      const struct sock *sk,
+					      const struct ipv6hdr *iph,
+					      u32 mtu, bool hold)
 {
 	struct rt6_info *rt6 = (struct rt6_info *)dst;
 
 	if (rt6->rt6i_flags & RTF_LOCAL)
-		return;
+		return dst;
 
 	dst_confirm(dst);
 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
 	if (mtu >= dst_mtu(dst))
-		return;
+		return dst;
 
 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
 		rt6_do_update_pmtu(rt6, mtu);
@@ -1372,11 +1374,13 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 			daddr = &sk->sk_v6_daddr;
 			saddr = &inet6_sk(sk)->saddr;
 		} else {
-			return;
+			return dst;
 		}
 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
+			if (hold)
+				dst_hold(&nrt6->dst);
 
 			/* ip6_ins_rt(nrt6) will bump the
 			 * rt6->rt6i_node->fn_sernum
@@ -1384,14 +1388,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 			 * invalidate the sk->sk_dst_cache.
 			 */
 			ip6_ins_rt(nrt6);
+			return &nrt6->dst;
 		}
 	}
+
+	return dst;
 }
 
 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			       struct sk_buff *skb, u32 mtu)
 {
-	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, false);
 }
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
@@ -1410,15 +1417,52 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
-		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), false);
 	dst_release(dst);
 }
 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
-	ip6_update_pmtu(skb, sock_net(sk), mtu,
-			sk->sk_bound_dev_if, sk->sk_mark);
+	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+	struct net *net = sock_net(sk);
+	struct dst_entry *ndst, *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+
+	bh_lock_sock(sk);
+
+	fl6.flowi6_oif = sk->sk_bound_dev_if;
+	fl6.flowi6_mark = sk->sk_mark ? : IP6_REPLY_MARK(net, skb->mark);
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = ip6_flowinfo(iph);
+
+	dst = sk_dst_get(sk);
+	if (sock_owned_by_user(sk) || !dst) {
+		ip6_update_pmtu(skb, net, mtu, fl6.flowi6_oif, fl6.flowi6_mark);
+		goto out;
+	}
+
+	if (dst->obsolete && !dst->ops->check(dst, 0)) {
+		dst_release(dst);
+		dst = ip6_route_output(net, sk, &fl6);
+		if (dst->error)
+			goto out;
+	}
+
+	ndst = __ip6_rt_update_pmtu(dst, sk, iph, ntohl(mtu), true);
+	if (ndst != dst) {
+		dst_release(dst);
+		dst = ndst;
+	}
+
+	if (sk->sk_state == TCP_ESTABLISHED)
+		ip6_dst_store(sk, dst, &iph->daddr, &iph->saddr);
+out:
+	bh_unlock_sock(sk);
+	dst_release(dst);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
 

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 20:13                             ` Cong Wang
@ 2016-03-22 23:36                               ` Eric Dumazet
  2016-03-23 23:57                                 ` Wei Wang
  2016-03-24 19:23                                 ` Cong Wang
  0 siblings, 2 replies; 32+ messages in thread
From: Eric Dumazet @ 2016-03-22 23:36 UTC (permalink / raw)
  To: Cong Wang
  Cc: Wei Wang, Martin KaFai Lau, Wei Wang, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Tue, 2016-03-22 at 13:13 -0700, Cong Wang wrote:
> On Tue, Mar 22, 2016 at 11:03 AM, Wei Wang <tracywwnj@gmail.com> wrote:
> > Thanks Martin and Cong.
> >
> > I guess then we are going with the following fix in ip6_sk_update_pmtu():
> > 1. call ip6_upate_pmtu() as it is
> > 2. do a dst_check()
> > 3. re-lookup() if it is invalid
> > 4. and then do a ip6_dst_store()/dst_set
> 
> Exactly, please try the attached patch. Note I did nothing more than a
> compile test.
> 
> Does it make sense to you now?


Hard to reply on your patch as it was not inlined.

1) Lot of code duplication, for some reason I do not yet understand.

ip6_sk_update_pmtu() and ip6_update_pmtu() will basically do the same
thing...

2)

+       if (sk->sk_state == TCP_ESTABLISHED)
+               ip6_dst_store(sk, dst, &iph->daddr, &iph->saddr);
+out:


ip6_dst_store() will do :

np->daddr_cache = daddr;  (&iph->daddr)
np->saddr_cache = saddr;  (&iph->saddr)

So when skb is freed, daddr_cache & saddr_cache point to freed data.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 23:36                               ` Eric Dumazet
@ 2016-03-23 23:57                                 ` Wei Wang
  2016-03-24 18:32                                   ` Martin KaFai Lau
  2016-03-25 23:55                                   ` Martin KaFai Lau
  2016-03-24 19:23                                 ` Cong Wang
  1 sibling, 2 replies; 32+ messages in thread
From: Wei Wang @ 2016-03-23 23:57 UTC (permalink / raw)
  To: Eric Dumazet, Martin KaFai Lau, Cong Wang, Eric Dumazet
  Cc: Wei Wang, David Miller, Linux Kernel Network Developers

What about something like this:

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed44663..21b4102 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1394,6 +1394,19 @@ static void ip6_rt_update_pmtu(struct dst_entry
*dst, struct sock *sk,
    __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
 }

+static void ip6_fill_in_flow(struct flowi6 *fl6,  struct net *net,
+                  struct sk_buff *skb, int oif, u32 mark)
+{
+   const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+
+   memset(fl6, 0, sizeof(fl6));
+   fl6->flowi6_oif = oif;
+   fl6->flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
+   fl6->daddr = iph->daddr;
+   fl6->saddr = iph->saddr;
+   fl6->flowlabel = ip6_flowinfo(iph);
+}
+
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
             int oif, u32 mark)
 {
@@ -1401,13 +1414,7 @@ void ip6_update_pmtu(struct sk_buff *skb,
struct net *net, __be32 mtu,
    struct dst_entry *dst;
    struct flowi6 fl6;

-   memset(&fl6, 0, sizeof(fl6));
-   fl6.flowi6_oif = oif;
-   fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
-   fl6.daddr = iph->daddr;
-   fl6.saddr = iph->saddr;
-   fl6.flowlabel = ip6_flowinfo(iph);
-
+   ip6_fill_in_flow(&fl6, net, skb, oif, mark);
    dst = ip6_route_output(net, NULL, &fl6);
    if (!dst->error)
        __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
@@ -1417,8 +1424,22 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);

 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
-   ip6_update_pmtu(skb, sock_net(sk), mtu,
+   struct ipv6_pinfo *np = inet6_sk(sk);
+   struct dst_entry *dst_new;
+   struct flowi6 fl6;
+   struct net *net = sock_net(sk);
+
+   ip6_update_pmtu(skb, net, mtu,
+           sk->sk_bound_dev_if, sk->sk_mark);
+
+   if (sk->sk_state == TCP_ESTABLISHED &&
+       !sk_dst_check(sk, np->dst_cookie)) {
+       ip6_fill_in_flow(&fl6, net, skb,
            sk->sk_bound_dev_if, sk->sk_mark);
+       dst_new = ip6_route_output(net, NULL, &fl6);
+       if (!IS_ERR(dst_new))
+           ip6_dst_store(sk, dst_new, NULL, NULL);
+   }
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);


Thanks.
Wei

On Tue, Mar 22, 2016 at 4:36 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2016-03-22 at 13:13 -0700, Cong Wang wrote:
>> On Tue, Mar 22, 2016 at 11:03 AM, Wei Wang <tracywwnj@gmail.com> wrote:
>> > Thanks Martin and Cong.
>> >
>> > I guess then we are going with the following fix in ip6_sk_update_pmtu():
>> > 1. call ip6_upate_pmtu() as it is
>> > 2. do a dst_check()
>> > 3. re-lookup() if it is invalid
>> > 4. and then do a ip6_dst_store()/dst_set
>>
>> Exactly, please try the attached patch. Note I did nothing more than a
>> compile test.
>>
>> Does it make sense to you now?
>
>
> Hard to reply on your patch as it was not inlined.
>
> 1) Lot of code duplication, for some reason I do not yet understand.
>
> ip6_sk_update_pmtu() and ip6_update_pmtu() will basically do the same
> thing...
>
> 2)
>
> +       if (sk->sk_state == TCP_ESTABLISHED)
> +               ip6_dst_store(sk, dst, &iph->daddr, &iph->saddr);
> +out:
>
>
> ip6_dst_store() will do :
>
> np->daddr_cache = daddr;  (&iph->daddr)
> np->saddr_cache = saddr;  (&iph->saddr)
>
> So when skb is freed, daddr_cache & saddr_cache point to freed data.
>
>
>
>

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-23 23:57                                 ` Wei Wang
@ 2016-03-24 18:32                                   ` Martin KaFai Lau
  2016-03-24 19:45                                     ` Martin KaFai Lau
  2016-03-25 23:55                                   ` Martin KaFai Lau
  1 sibling, 1 reply; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-24 18:32 UTC (permalink / raw)
  To: Wei Wang, Eric Dumazet
  Cc: Eric Dumazet, Cong Wang, Wei Wang, David Miller,
	Linux Kernel Network Developers

On Wed, Mar 23, 2016 at 04:57:22PM -0700, Wei Wang wrote:
> What about something like this:
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index ed44663..21b4102 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1394,6 +1394,19 @@ static void ip6_rt_update_pmtu(struct dst_entry
> *dst, struct sock *sk,
>     __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
>  }
>
> +static void ip6_fill_in_flow(struct flowi6 *fl6,  struct net *net,
> +                  struct sk_buff *skb, int oif, u32 mark)
> +{
> +   const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
> +
> +   memset(fl6, 0, sizeof(fl6));
> +   fl6->flowi6_oif = oif;
> +   fl6->flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
> +   fl6->daddr = iph->daddr;
> +   fl6->saddr = iph->saddr;
> +   fl6->flowlabel = ip6_flowinfo(iph);
> +}
> +
>  void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
>              int oif, u32 mark)
>  {
> @@ -1401,13 +1414,7 @@ void ip6_update_pmtu(struct sk_buff *skb,
> struct net *net, __be32 mtu,
>     struct dst_entry *dst;
>     struct flowi6 fl6;
>
> -   memset(&fl6, 0, sizeof(fl6));
> -   fl6.flowi6_oif = oif;
> -   fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
> -   fl6.daddr = iph->daddr;
> -   fl6.saddr = iph->saddr;
> -   fl6.flowlabel = ip6_flowinfo(iph);
> -
> +   ip6_fill_in_flow(&fl6, net, skb, oif, mark);
>     dst = ip6_route_output(net, NULL, &fl6);
>     if (!dst->error)
>         __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
> @@ -1417,8 +1424,22 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
>
>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
>  {
> -   ip6_update_pmtu(skb, sock_net(sk), mtu,
> +   struct ipv6_pinfo *np = inet6_sk(sk);
> +   struct dst_entry *dst_new;
> +   struct flowi6 fl6;
> +   struct net *net = sock_net(sk);
> +
> +   ip6_update_pmtu(skb, net, mtu,
> +           sk->sk_bound_dev_if, sk->sk_mark);
> +
> +   if (sk->sk_state == TCP_ESTABLISHED &&
> +       !sk_dst_check(sk, np->dst_cookie)) {
sk_dst_check could hold a refcnt to the current sk->sk_dst_cache.
It has to be released.

> +       ip6_fill_in_flow(&fl6, net, skb,
>             sk->sk_bound_dev_if, sk->sk_mark);
> +       dst_new = ip6_route_output(net, NULL, &fl6);
> +       if (!IS_ERR(dst_new))
> +           ip6_dst_store(sk, dst_new, NULL, NULL);
Has sk been locked before ip6_dst_store?

Some fast path may do __sk_dst_get() which does not refcnt the sk->sk_dst_cache.

However, after a quick look, it seems udpv6_sendmsg() is calling ip6_dst_store()
after release_sock also.  I could be missing something.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-22 23:36                               ` Eric Dumazet
  2016-03-23 23:57                                 ` Wei Wang
@ 2016-03-24 19:23                                 ` Cong Wang
  1 sibling, 0 replies; 32+ messages in thread
From: Cong Wang @ 2016-03-24 19:23 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Wei Wang, Martin KaFai Lau, Wei Wang, David Miller, Eric Dumazet,
	Linux Kernel Network Developers

On Tue, Mar 22, 2016 at 4:36 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2016-03-22 at 13:13 -0700, Cong Wang wrote:
>> On Tue, Mar 22, 2016 at 11:03 AM, Wei Wang <tracywwnj@gmail.com> wrote:
>> > Thanks Martin and Cong.
>> >
>> > I guess then we are going with the following fix in ip6_sk_update_pmtu():
>> > 1. call ip6_upate_pmtu() as it is
>> > 2. do a dst_check()
>> > 3. re-lookup() if it is invalid
>> > 4. and then do a ip6_dst_store()/dst_set
>>
>> Exactly, please try the attached patch. Note I did nothing more than a
>> compile test.
>>
>> Does it make sense to you now?
>
>
> Hard to reply on your patch as it was not inlined.

Sorry about that, it is still not ready to send out formally yet, because
I don't do any run-time tests yet.

>
> 1) Lot of code duplication, for some reason I do not yet understand.
>
> ip6_sk_update_pmtu() and ip6_update_pmtu() will basically do the same
> thing...

Hmm? After my patch ip6_sk_update_pmtu() will take care of sk_dst_cache,
but ip6_update_pmtu() doesn't do that since it has no 'sk' parameter.
It is pretty much similar to commit 9cb3a50c5f63.

>
> 2)
>
> +       if (sk->sk_state == TCP_ESTABLISHED)
> +               ip6_dst_store(sk, dst, &iph->daddr, &iph->saddr);
> +out:
>
>
> ip6_dst_store() will do :
>
> np->daddr_cache = daddr;  (&iph->daddr)
> np->saddr_cache = saddr;  (&iph->saddr)
>
> So when skb is freed, daddr_cache & saddr_cache point to freed data.

Ah, good catch, so ip6_dst_store() can only use the addr's stored
in socket or NULL.

I will send out 2 formal patches shortly.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-24 18:32                                   ` Martin KaFai Lau
@ 2016-03-24 19:45                                     ` Martin KaFai Lau
  0 siblings, 0 replies; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-24 19:45 UTC (permalink / raw)
  To: Wei Wang, Eric Dumazet
  Cc: Eric Dumazet, Cong Wang, Wei Wang, David Miller,
	Linux Kernel Network Developers

On Thu, Mar 24, 2016 at 11:32:28AM -0700, Martin KaFai Lau wrote:
> Some fast path may do __sk_dst_get() which does not refcnt the sk->sk_dst_cache.
Ignore this comment.  This should have been protected by rcu grace period.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-23 23:57                                 ` Wei Wang
  2016-03-24 18:32                                   ` Martin KaFai Lau
@ 2016-03-25 23:55                                   ` Martin KaFai Lau
  2016-03-26  0:16                                     ` Martin KaFai Lau
  1 sibling, 1 reply; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-25 23:55 UTC (permalink / raw)
  To: Wei Wang
  Cc: Eric Dumazet, Cong Wang, Eric Dumazet, Wei Wang, David Miller,
	Linux Kernel Network Developers

On Wed, Mar 23, 2016 at 04:57:22PM -0700, Wei Wang wrote:
> What about something like this:
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index ed44663..21b4102 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1394,6 +1394,19 @@ static void ip6_rt_update_pmtu(struct dst_entry
> *dst, struct sock *sk,
>     __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
>  }
>
> +static void ip6_fill_in_flow(struct flowi6 *fl6,  struct net *net,
> +                  struct sk_buff *skb, int oif, u32 mark)
> +{
> +   const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
> +
> +   memset(fl6, 0, sizeof(fl6));
> +   fl6->flowi6_oif = oif;
> +   fl6->flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
> +   fl6->daddr = iph->daddr;
> +   fl6->saddr = iph->saddr;
> +   fl6->flowlabel = ip6_flowinfo(iph);
> +}
> +
>  void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
>              int oif, u32 mark)
>  {
> @@ -1401,13 +1414,7 @@ void ip6_update_pmtu(struct sk_buff *skb,
> struct net *net, __be32 mtu,
>     struct dst_entry *dst;
>     struct flowi6 fl6;
>
> -   memset(&fl6, 0, sizeof(fl6));
> -   fl6.flowi6_oif = oif;
> -   fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
> -   fl6.daddr = iph->daddr;
> -   fl6.saddr = iph->saddr;
> -   fl6.flowlabel = ip6_flowinfo(iph);
> -
> +   ip6_fill_in_flow(&fl6, net, skb, oif, mark);
>     dst = ip6_route_output(net, NULL, &fl6);
>     if (!dst->error)
>         __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
> @@ -1417,8 +1424,22 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
>
>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
>  {
> -   ip6_update_pmtu(skb, sock_net(sk), mtu,
> +   struct ipv6_pinfo *np = inet6_sk(sk);
> +   struct dst_entry *dst_new;
> +   struct flowi6 fl6;
> +   struct net *net = sock_net(sk);
> +
> +   ip6_update_pmtu(skb, net, mtu,
> +           sk->sk_bound_dev_if, sk->sk_mark);
> +
> +   if (sk->sk_state == TCP_ESTABLISHED &&
> +       !sk_dst_check(sk, np->dst_cookie)) {
> +       ip6_fill_in_flow(&fl6, net, skb,
>             sk->sk_bound_dev_if, sk->sk_mark);
> +       dst_new = ip6_route_output(net, NULL, &fl6);
> +       if (!IS_ERR(dst_new))
> +           ip6_dst_store(sk, dst_new, NULL, NULL);
> +   }
>  }
>  EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);

Here is the change I come up with (based on your flow key buildup function
with one fix), take a little different approach in ip6_sk_update_pmtu()
and some explanation on ip6_dst_store() after looking at ip6_rt_check()

I have not included the sk lock yet.  There is still a few things I don't fully
understand in another discussion thread.

[PATCH] ipv6: Update sk->sk_dst_cache in ip6_sk_update_pmtu()

There is a case in connected UDP socket such that
getsockopt(IPV6_MTU) will return a stale MTU value. The reproducible
sequence could be the following:
1. Create a connected UDP socket
2. Send some datagrams out
3. Receive a ICMPV6_PKT_TOOBIG
4. No new outgoing datagrams to trigger the sk_dst_check()
   logic to update the sk->sk_dst_cache.
5. getsockopt(IPV6_MTU) returns the mtu from the invalid
   sk->sk_dst_cache instead of the newly created RTF_CACHE clone which
   contains the MTU value instructed by the ICMPV6_PKT_TOOBIG message.

One observation is that, the __udp6_lib_err() code path does not do a
sk_dst_check() and route relookup (if needed) after doing the pmtu
update (while TCP does).

This patch does a ip6_dst_check() in ip6_sk_update_pmtu() which is used
by UDP to do the pmtu update and update sk->sk_dst_cache if it is needed.

ip6_dst_store(sk, ndst, NULL, NULL) is used.  NULL(s) are passed to
the daddr and saddr parameters for simplicity reason.  The current
use case is only for the UDP lookup.  The details is in ip6_rt_check().

At the point where ip6_dst_store(sk, ndst, NULL, NULL) is called in
this patch, ndst should be a RTF_CACHE clone (which implies
((struct rt6i_info *)ndst)->rt6i_dst.plen == 128) and ip6_rt_check()
will pass without even considering np->d/saddr_cache.

Even it has to use np->d/saddr_cache, the worst case is ip6_rt_check()
fails and causes one more route lookup.

Test:

Server (Connected UDP Socket):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Route Details:
[root@arch-fb-vm1 ~]# ip -6 r show | egrep '2fac'
2fac::/64 dev eth0  proto kernel  metric 256  pref medium
2fac:face::/64 via 2fac::face dev eth0  metric 1024  pref medium

A crappy python code to create a connected UDP socket:

import socket
import errno

HOST = '2fac::1'
PORT = 8080

s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
s.bind((HOST, PORT))
s.connect(('2fac:face::face', 53))
print("connected")
while True:
    try:
	data = s.recv(1024)
    except socket.error as se:
	if se.errno == errno.EMSGSIZE:
		pmtu = s.getsockopt(41, 24)
		print("PMTU:%d" % pmtu)
		break
s.close()

Python program output after getting a ICMPV6_PKT_TOOBIG:
[root@arch-fb-vm1 ~]# python2 ~/devshare/kernel/tasks/fib6/udp-connect-53-8080.py
connected
PMTU:1300

Cache routes after recieving TOOBIG:
[root@arch-fb-vm1 ~]# ip -6 r show table cache
2fac:face::face via 2fac::face dev eth0  metric 0
    cache  expires 463sec mtu 1300 pref medium

Client (Send the ICMPV6_PKT_TOOBIG):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
scapy is used to generate the TOOBIG message.  Here is the scrapy script I have
used:

>>> p=Ether(src='da:75:4d:36:ac:32', dst='52:54:00:12:34:66', type=0x86dd)/IPv6(src='2fac::face', dst='2fac::1')/ICMPv6PacketTooBig(mtu=1300)/IPv6(src='2fac::
1',dst='2fac:face::face', nh='UDP')/UDP(sport=8080,dport=53)
>>> sendp(p, iface='qemubr0')

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reported-by: Wei Wang <weiwan@google.com>
---
 net/ipv6/route.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed44663..06934ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1394,6 +1394,20 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
 }

+static void build_skb_flow_key(struct flowi6 *fl6,
+			       const struct sk_buff *skb,
+			       struct net *net, int oif, u32 mark)
+{
+	const struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
+
+	memset(fl6, 0, sizeof(*fl6));
+	fl6->flowi6_oif = oif;
+	fl6->flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
+	fl6->daddr = iph->daddr;
+	fl6->saddr = iph->saddr;
+	fl6->flowlabel = ip6_flowinfo(iph);
+}
+
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 		     int oif, u32 mark)
 {
@@ -1401,13 +1415,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 	struct dst_entry *dst;
 	struct flowi6 fl6;

-	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
-	fl6.daddr = iph->daddr;
-	fl6.saddr = iph->saddr;
-	fl6.flowlabel = ip6_flowinfo(iph);
-
+	build_skb_flow_key(&fl6, skb, net, oif, mark);
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
@@ -1417,8 +1425,26 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);

 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
+	struct dst_entry *odst;
+
+	odst = sk_dst_get(sk);
+
 	ip6_update_pmtu(skb, sock_net(sk), mtu,
 			sk->sk_bound_dev_if, sk->sk_mark);
+
+	if (odst && !odst->error &&
+	    !ip6_dst_check(odst, inet6_sk(sk)->dst_cookie)) {
+		struct dst_entry *ndst;
+		struct flowi6 fl6;
+
+		build_skb_flow_key(&fl6, skb, sock_net(sk),
+				   sk->sk_bound_dev_if, sk->sk_mark);
+		ndst = ip6_route_output(sock_net(sk), NULL, &fl6);
+		if (!ndst->error)
+			ip6_dst_store(sk, ndst, NULL, NULL);
+	}
+
+	dst_release(odst);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-25 23:55                                   ` Martin KaFai Lau
@ 2016-03-26  0:16                                     ` Martin KaFai Lau
  2016-03-28 22:39                                       ` Cong Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-26  0:16 UTC (permalink / raw)
  To: Wei Wang
  Cc: Eric Dumazet, Cong Wang, Eric Dumazet, Wei Wang, David Miller,
	Linux Kernel Network Developers

On Fri, Mar 25, 2016 at 04:55:27PM -0700, Martin KaFai Lau wrote:
>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
>  {
> +	struct dst_entry *odst;
> +
> +	odst = sk_dst_get(sk);
> +
>  	ip6_update_pmtu(skb, sock_net(sk), mtu,
>  			sk->sk_bound_dev_if, sk->sk_mark);
> +
> +	if (odst && !odst->error &&
> +	    !ip6_dst_check(odst, inet6_sk(sk)->dst_cookie)) {
> +		struct dst_entry *ndst;
> +		struct flowi6 fl6;
> +
> +		build_skb_flow_key(&fl6, skb, sock_net(sk),
> +				   sk->sk_bound_dev_if, sk->sk_mark);
> +		ndst = ip6_route_output(sock_net(sk), NULL, &fl6);
> +		if (!ndst->error)
> +			ip6_dst_store(sk, ndst, NULL, NULL);
oops...missed:
		else
			dst_release(ndst);
> +	}
> +
> +	dst_release(odst);
>  }
>  EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
>
> --
> 2.5.1

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-26  0:16                                     ` Martin KaFai Lau
@ 2016-03-28 22:39                                       ` Cong Wang
  2016-03-28 23:44                                         ` Martin KaFai Lau
  0 siblings, 1 reply; 32+ messages in thread
From: Cong Wang @ 2016-03-28 22:39 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Wei Wang, Eric Dumazet, Eric Dumazet, Wei Wang, David Miller,
	Linux Kernel Network Developers

On Fri, Mar 25, 2016 at 5:16 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> On Fri, Mar 25, 2016 at 04:55:27PM -0700, Martin KaFai Lau wrote:
>>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
>>  {
>> +     struct dst_entry *odst;
>> +
>> +     odst = sk_dst_get(sk);
>> +
>>       ip6_update_pmtu(skb, sock_net(sk), mtu,
>>                       sk->sk_bound_dev_if, sk->sk_mark);
>> +
>> +     if (odst && !odst->error &&
>> +         !ip6_dst_check(odst, inet6_sk(sk)->dst_cookie)) {
>> +             struct dst_entry *ndst;
>> +             struct flowi6 fl6;
>> +
>> +             build_skb_flow_key(&fl6, skb, sock_net(sk),
>> +                                sk->sk_bound_dev_if, sk->sk_mark);
>> +             ndst = ip6_route_output(sock_net(sk), NULL, &fl6);
>> +             if (!ndst->error)
>> +                     ip6_dst_store(sk, ndst, NULL, NULL);
> oops...missed:
>                 else
>                         dst_release(ndst);


Can you send an updated patch for review? Since you have a test case
while I didn't.

Also,

1) I think you need to check obsolete before update? There is no
reason to update an obsoleted dst?

2) Not sure if we need to update dst or dst->path here, I guess the later
counts in xfrm case therefore more correct.

Thanks.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-28 22:39                                       ` Cong Wang
@ 2016-03-28 23:44                                         ` Martin KaFai Lau
  0 siblings, 0 replies; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-28 23:44 UTC (permalink / raw)
  To: Cong Wang
  Cc: Wei Wang, Eric Dumazet, Eric Dumazet, Wei Wang, David Miller,
	Linux Kernel Network Developers

On Mon, Mar 28, 2016 at 03:39:42PM -0700, Cong Wang wrote:
> On Fri, Mar 25, 2016 at 5:16 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> > On Fri, Mar 25, 2016 at 04:55:27PM -0700, Martin KaFai Lau wrote:
> >>  void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
> >>  {
> >> +     struct dst_entry *odst;
> >> +
> >> +     odst = sk_dst_get(sk);
> >> +
> >>       ip6_update_pmtu(skb, sock_net(sk), mtu,
> >>                       sk->sk_bound_dev_if, sk->sk_mark);
> >> +
> >> +     if (odst && !odst->error &&
> >> +         !ip6_dst_check(odst, inet6_sk(sk)->dst_cookie)) {
> >> +             struct dst_entry *ndst;
> >> +             struct flowi6 fl6;
> >> +
> >> +             build_skb_flow_key(&fl6, skb, sock_net(sk),
> >> +                                sk->sk_bound_dev_if, sk->sk_mark);
> >> +             ndst = ip6_route_output(sock_net(sk), NULL, &fl6);
> >> +             if (!ndst->error)
> >> +                     ip6_dst_store(sk, ndst, NULL, NULL);
> > oops...missed:
> >                 else
> >                         dst_release(ndst);
>
>
> Can you send an updated patch for review? Since you have a test case
> while I didn't.
>
> Also,
>
> 2) Not sure if we need to update dst or dst->path here, I guess the later
> counts in xfrm case therefore more correct.
Thanks for pointing it out.  Calling ip6_dst_check is also not appropriate here.
odst->ops->check() should be used.  I need to take a closer look and may try
another approach.

Thanks
-- Martin

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-02 19:19 Wei Wang
  2016-03-06  5:55 ` David Miller
@ 2016-03-07 21:28 ` Martin KaFai Lau
  1 sibling, 0 replies; 32+ messages in thread
From: Martin KaFai Lau @ 2016-03-07 21:28 UTC (permalink / raw)
  To: Wei Wang; +Cc: David S . Miller, netdev, Eric Dumazet

On Wed, Mar 02, 2016 at 11:19:21AM -0800, Wei Wang wrote:
> From: Wei Wang <weiwan@google.com>
>
> When ICMPV6_PKT_TOOBIG message is received by a connected UDP socket,
> the new mtu value is not properly updated in the dst_entry associated
> with the socket.
> This leads to the issue that the mtu value returned by getsockopt(sockfd,
> IPPROTO_IPV6, IPV6_MTU, ...) is wrong.
> The fix is to call the corresponding pmtu related function for connected
> socket so that the dst_entry associated with the socket will get updated
> with the new mtu value.
Would it be a better fix if ip6_sk_update_pmtu() does a dst_check() and
updates sk->sk_dst_cache (if needed) before it returns?  It seems
ipv4_sk_update_pmtu() is also doing it.

or

sk->sk_dst_cache is still valid after ip6_sk_update_pmtu() returns?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-07 17:19   ` Eric Dumazet
@ 2016-03-07 17:35     ` Eric Dumazet
  0 siblings, 0 replies; 32+ messages in thread
From: Eric Dumazet @ 2016-03-07 17:35 UTC (permalink / raw)
  To: David Miller; +Cc: Wei Wang, netdev, Martin KaFai Lau

On Mon, Mar 7, 2016 at 9:19 AM, Eric Dumazet <edumazet@google.com> wrote:
> On Sat, Mar 5, 2016 at 9:55 PM, David Miller <davem@davemloft.net> wrote:
>> From: Wei Wang <weiwan@google.com>
>> Date: Wed,  2 Mar 2016 11:19:21 -0800
>>
>>> @@ -566,7 +567,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
>>>       if (type == ICMPV6_PKT_TOOBIG) {
>>>               if (!ip6_sk_accept_pmtu(sk))
>>>                       goto out;
>>> -             ip6_sk_update_pmtu(skb, sk, info);
>>> +             bh_lock_sock(sk);
>>> +             if (sk->sk_state == TCP_ESTABLISHED &&
>>> +                 !sock_owned_by_user(sk) &&
>>> +                 ipv6_addr_equal(saddr, &sk->sk_v6_rcv_saddr) &&
>>> +                 ipv6_addr_equal(daddr, &sk->sk_v6_daddr) &&
>>> +                 uh->dest == sk->sk_dport)
>>> +                     inet6_csk_update_pmtu(sk, ntohl(info));
>>
>> If I apply this patch it will hide a bug.
>>
>> Why isn't ip6_sk_update_pmtu() matching the same route as the
>> one attached to the socket?
>>
>> I'd prefer you figure out what part of the lookup key used is
>> wrong, and fix that instead.
>>
>
> The dst itself is the same than the socket sk_dst_cache, but
> __ip6_rt_update_pmtu() sees rt6_cache_allowed_for_pmtu()
>
> We endup doing :
>
>                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
>                 if (nrt6) {
>                         rt6_do_update_pmtu(nrt6, mtu);
>
>                         /* ip6_ins_rt(nrt6) will bump the
>                          * rt6->rt6i_node->fn_sernum
>                          * which will fail the next rt6_check() and
>                          * invalidate the sk->sk_dst_cache.
>                          */
>                         ip6_ins_rt(nrt6);
>                 }
>
>
> But apparently the sk->sk_dst_cache is _not_ invalidated, even if the
> comment loudly claims so.


Wei and Martin, what do you think of :


diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ed446639219c..17e5db80be62 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1342,7 +1342,7 @@ static void rt6_do_update_pmtu(struct rt6_info
*rt, u32 mtu)

 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
-       return !(rt->rt6i_flags & RTF_CACHE) &&
+       return (rt->rt6i_flags & RTF_CACHE) &&
                (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
 }

@@ -1359,7 +1359,7 @@ static void __ip6_rt_update_pmtu(struct
dst_entry *dst, const struct sock *sk,
        if (mtu >= dst_mtu(dst))
                return;

-       if (!rt6_cache_allowed_for_pmtu(rt6)) {
+       if (rt6_cache_allowed_for_pmtu(rt6)) {
                rt6_do_update_pmtu(rt6, mtu);
        } else {
                const struct in6_addr *daddr, *saddr;

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-06  5:55 ` David Miller
@ 2016-03-07 17:19   ` Eric Dumazet
  2016-03-07 17:35     ` Eric Dumazet
  0 siblings, 1 reply; 32+ messages in thread
From: Eric Dumazet @ 2016-03-07 17:19 UTC (permalink / raw)
  To: David Miller; +Cc: Wei Wang, netdev

On Sat, Mar 5, 2016 at 9:55 PM, David Miller <davem@davemloft.net> wrote:
> From: Wei Wang <weiwan@google.com>
> Date: Wed,  2 Mar 2016 11:19:21 -0800
>
>> @@ -566,7 +567,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
>>       if (type == ICMPV6_PKT_TOOBIG) {
>>               if (!ip6_sk_accept_pmtu(sk))
>>                       goto out;
>> -             ip6_sk_update_pmtu(skb, sk, info);
>> +             bh_lock_sock(sk);
>> +             if (sk->sk_state == TCP_ESTABLISHED &&
>> +                 !sock_owned_by_user(sk) &&
>> +                 ipv6_addr_equal(saddr, &sk->sk_v6_rcv_saddr) &&
>> +                 ipv6_addr_equal(daddr, &sk->sk_v6_daddr) &&
>> +                 uh->dest == sk->sk_dport)
>> +                     inet6_csk_update_pmtu(sk, ntohl(info));
>
> If I apply this patch it will hide a bug.
>
> Why isn't ip6_sk_update_pmtu() matching the same route as the
> one attached to the socket?
>
> I'd prefer you figure out what part of the lookup key used is
> wrong, and fix that instead.
>

The dst itself is the same than the socket sk_dst_cache, but
__ip6_rt_update_pmtu() sees rt6_cache_allowed_for_pmtu()

We endup doing :

                nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
                if (nrt6) {
                        rt6_do_update_pmtu(nrt6, mtu);

                        /* ip6_ins_rt(nrt6) will bump the
                         * rt6->rt6i_node->fn_sernum
                         * which will fail the next rt6_check() and
                         * invalidate the sk->sk_dst_cache.
                         */
                        ip6_ins_rt(nrt6);
                }


But apparently the sk->sk_dst_cache is _not_ invalidated, even if the
comment loudly claims so.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] ipv6: Fix the pmtu path for connected UDP socket
  2016-03-02 19:19 Wei Wang
@ 2016-03-06  5:55 ` David Miller
  2016-03-07 17:19   ` Eric Dumazet
  2016-03-07 21:28 ` Martin KaFai Lau
  1 sibling, 1 reply; 32+ messages in thread
From: David Miller @ 2016-03-06  5:55 UTC (permalink / raw)
  To: weiwan; +Cc: netdev, edumazet

From: Wei Wang <weiwan@google.com>
Date: Wed,  2 Mar 2016 11:19:21 -0800

> @@ -566,7 +567,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
>  	if (type == ICMPV6_PKT_TOOBIG) {
>  		if (!ip6_sk_accept_pmtu(sk))
>  			goto out;
> -		ip6_sk_update_pmtu(skb, sk, info);
> +		bh_lock_sock(sk);
> +		if (sk->sk_state == TCP_ESTABLISHED &&
> +		    !sock_owned_by_user(sk) &&
> +		    ipv6_addr_equal(saddr, &sk->sk_v6_rcv_saddr) &&
> +		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr) &&
> +		    uh->dest == sk->sk_dport)
> +			inet6_csk_update_pmtu(sk, ntohl(info));

If I apply this patch it will hide a bug.

Why isn't ip6_sk_update_pmtu() matching the same route as the
one attached to the socket?

I'd prefer you figure out what part of the lookup key used is
wrong, and fix that instead.

Thanks.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH] ipv6: Fix the pmtu path for connected UDP socket
@ 2016-03-02 19:19 Wei Wang
  2016-03-06  5:55 ` David Miller
  2016-03-07 21:28 ` Martin KaFai Lau
  0 siblings, 2 replies; 32+ messages in thread
From: Wei Wang @ 2016-03-02 19:19 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, Eric Dumazet, Wei Wang

From: Wei Wang <weiwan@google.com>

When ICMPV6_PKT_TOOBIG message is received by a connected UDP socket,
the new mtu value is not properly updated in the dst_entry associated
with the socket.
This leads to the issue that the mtu value returned by getsockopt(sockfd,
IPPROTO_IPV6, IPV6_MTU, ...) is wrong.
The fix is to call the corresponding pmtu related function for connected
socket so that the dst_entry associated with the socket will get updated
with the new mtu value.
And before we call the above function to update mtu, we check to make
sure the source IP address, destination IP address, source port and
destination port are matching between the incoming flow and the socket.

Signed-off-by: Wei Wang <weiwan@google.com>
---
 net/ipv6/udp.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0711f8f..5e6ba54 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -49,6 +49,7 @@
 #include <net/inet6_hashtables.h>
 #include <net/busy_poll.h>
 #include <net/sock_reuseport.h>
+#include <net/inet6_connection_sock.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -566,7 +567,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == ICMPV6_PKT_TOOBIG) {
 		if (!ip6_sk_accept_pmtu(sk))
 			goto out;
-		ip6_sk_update_pmtu(skb, sk, info);
+		bh_lock_sock(sk);
+		if (sk->sk_state == TCP_ESTABLISHED &&
+		    !sock_owned_by_user(sk) &&
+		    ipv6_addr_equal(saddr, &sk->sk_v6_rcv_saddr) &&
+		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr) &&
+		    uh->dest == sk->sk_dport)
+			inet6_csk_update_pmtu(sk, ntohl(info));
+		else
+			ip6_sk_update_pmtu(skb, sk, info);
+		bh_unlock_sock(sk);
 		if (np->pmtudisc != IPV6_PMTUDISC_DONT)
 			harderr = 1;
 	}
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2016-03-28 23:44 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-14 20:59 [PATCH] ipv6: Fix the pmtu path for connected UDP socket Wei Wang
2016-03-16 23:53 ` David Miller
2016-03-17  0:22   ` Eric Dumazet
2016-03-17  2:38     ` David Miller
2016-03-17  3:34       ` Eric Dumazet
2016-03-18 17:19         ` Cong Wang
     [not found]           ` <CAEA6p_CfDYYLf-s0pUy7VNFH+46PDZSXNzUExXRAM_xv2w8tcw@mail.gmail.com>
2016-03-18 18:10             ` Cong Wang
2016-03-18 21:26               ` Wei Wang
2016-03-18 22:09                 ` Cong Wang
2016-03-21 17:13                   ` Wei Wang
2016-03-22  6:02                     ` Martin KaFai Lau
2016-03-22 16:53                       ` Cong Wang
2016-03-22 17:39                         ` Martin KaFai Lau
2016-03-22 18:03                           ` Wei Wang
2016-03-22 18:16                             ` Martin KaFai Lau
2016-03-22 20:13                             ` Cong Wang
2016-03-22 23:36                               ` Eric Dumazet
2016-03-23 23:57                                 ` Wei Wang
2016-03-24 18:32                                   ` Martin KaFai Lau
2016-03-24 19:45                                     ` Martin KaFai Lau
2016-03-25 23:55                                   ` Martin KaFai Lau
2016-03-26  0:16                                     ` Martin KaFai Lau
2016-03-28 22:39                                       ` Cong Wang
2016-03-28 23:44                                         ` Martin KaFai Lau
2016-03-24 19:23                                 ` Cong Wang
2016-03-22 18:19                           ` Cong Wang
2016-03-18  7:17 ` Martin KaFai Lau
  -- strict thread matches above, loose matches on Subject: below --
2016-03-02 19:19 Wei Wang
2016-03-06  5:55 ` David Miller
2016-03-07 17:19   ` Eric Dumazet
2016-03-07 17:35     ` Eric Dumazet
2016-03-07 21:28 ` Martin KaFai Lau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.