All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] ipv4: fix fnhe usage by non-cached routes
@ 2018-05-02  6:41 Julian Anastasov
  2018-05-02 17:04 ` David Miller
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Julian Anastasov @ 2018-05-02  6:41 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Martin KaFai Lau, kernel-team, David Ahern, Xin Long

Allow some non-cached routes to use non-expired fnhe:

1. ip_del_fnhe: moved above and now called by find_exception.
The 4.5+ commit deed49df7390 expires fnhe only when caching
routes. Change that to:

1.1. use fnhe for non-cached local output routes, with the help
from (2)

1.2. allow __mkroute_input to detect expired fnhe (outdated
fnhe_gw, for example) when do_cache is false, eg. when itag!=0
for unicast destinations.

2. __mkroute_output: keep fi to allow local routes with orig_oif != 0
to use fnhe info even when the new route will not be cached into fnhe.
After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib
result for local traffic") it means all local routes will be affected
because they are not cached. This change is used to solve a PMTU
problem with IPVS (and probably Netfilter DNAT) setups that redirect
local clients from target local IP (local route to Virtual IP)
to new remote IP target, eg. IPVS TUN real server. Loopback has
64K MTU and we need to create fnhe on the local route that will
keep the reduced PMTU for the Virtual IP. Without this change
fnhe_pmtu is updated from ICMP but never exposed to non-cached
local routes. This includes routes with flowi4_oif!=0 for 4.6+ and
with flowi4_oif=any for 4.14+).

3. update_or_create_fnhe: make sure fnhe_expires is not 0 for
new entries

Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic")
Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif")
Fixes: deed49df7390 ("route: check and remove route cache when we get route")
Cc: David Ahern <dsahern@gmail.com>
Cc: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
 net/ipv4/route.c | 118 +++++++++++++++++++++++++------------------------------
 1 file changed, 53 insertions(+), 65 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ccb25d8..1412a7b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
 		fnhe->fnhe_mtu_locked = lock;
-		fnhe->fnhe_expires = expires;
+		fnhe->fnhe_expires = max(1UL, expires);
 
 		/* Exception created; mark the cached routes for the nexthop
 		 * stale, so anyone caching it rechecks if this exception
@@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+	struct fnhe_hash_bucket *hash;
+	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+	u32 hval = fnhe_hashfun(daddr);
+
+	spin_lock_bh(&fnhe_lock);
+
+	hash = rcu_dereference_protected(nh->nh_exceptions,
+					 lockdep_is_held(&fnhe_lock));
+	hash += hval;
+
+	fnhe_p = &hash->chain;
+	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+	while (fnhe) {
+		if (fnhe->fnhe_daddr == daddr) {
+			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			fnhe_flush_routes(fnhe);
+			kfree_rcu(fnhe, rcu);
+			break;
+		}
+		fnhe_p = &fnhe->fnhe_next;
+		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+						 lockdep_is_held(&fnhe_lock));
+	}
+
+	spin_unlock_bh(&fnhe_lock);
+}
+
 static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 {
 	struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 
 	for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
-		if (fnhe->fnhe_daddr == daddr)
+		if (fnhe->fnhe_daddr == daddr) {
+			if (fnhe->fnhe_expires &&
+			    time_after(jiffies, fnhe->fnhe_expires)) {
+				ip_del_fnhe(nh, daddr);
+				break;
+			}
 			return fnhe;
+		}
 	}
 	return NULL;
 }
@@ -1636,36 +1672,6 @@ static void ip_handle_martian_source(struct net_device *dev,
 #endif
 }
 
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
-{
-	struct fnhe_hash_bucket *hash;
-	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
-	u32 hval = fnhe_hashfun(daddr);
-
-	spin_lock_bh(&fnhe_lock);
-
-	hash = rcu_dereference_protected(nh->nh_exceptions,
-					 lockdep_is_held(&fnhe_lock));
-	hash += hval;
-
-	fnhe_p = &hash->chain;
-	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
-	while (fnhe) {
-		if (fnhe->fnhe_daddr == daddr) {
-			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
-				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
-			fnhe_flush_routes(fnhe);
-			kfree_rcu(fnhe, rcu);
-			break;
-		}
-		fnhe_p = &fnhe->fnhe_next;
-		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
-						 lockdep_is_held(&fnhe_lock));
-	}
-
-	spin_unlock_bh(&fnhe_lock);
-}
-
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   const struct fib_result *res,
@@ -1719,20 +1725,10 @@ static int __mkroute_input(struct sk_buff *skb,
 
 	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
 	if (do_cache) {
-		if (fnhe) {
+		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
-			if (rth && rth->dst.expires &&
-			    time_after(jiffies, rth->dst.expires)) {
-				ip_del_fnhe(&FIB_RES_NH(*res), daddr);
-				fnhe = NULL;
-			} else {
-				goto rt_cache;
-			}
-		}
-
-		rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
-
-rt_cache:
+		else
+			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2216,39 +2212,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		 * the loopback interface and the IP_PKTINFO ipi_ifindex will
 		 * be set to the loopback interface as well.
 		 */
-		fi = NULL;
+		do_cache = false;
 	}
 
 	fnhe = NULL;
 	do_cache &= fi != NULL;
-	if (do_cache) {
+	if (fi) {
 		struct rtable __rcu **prth;
 		struct fib_nh *nh = &FIB_RES_NH(*res);
 
 		fnhe = find_exception(nh, fl4->daddr);
+		if (!do_cache)
+			goto add;
 		if (fnhe) {
 			prth = &fnhe->fnhe_rth_output;
-			rth = rcu_dereference(*prth);
-			if (rth && rth->dst.expires &&
-			    time_after(jiffies, rth->dst.expires)) {
-				ip_del_fnhe(nh, fl4->daddr);
-				fnhe = NULL;
-			} else {
-				goto rt_cache;
+		} else {
+			if (unlikely(fl4->flowi4_flags &
+				     FLOWI_FLAG_KNOWN_NH &&
+				     !(nh->nh_gw &&
+				       nh->nh_scope == RT_SCOPE_LINK))) {
+				do_cache = false;
+				goto add;
 			}
+			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		}
-
-		if (unlikely(fl4->flowi4_flags &
-			     FLOWI_FLAG_KNOWN_NH &&
-			     !(nh->nh_gw &&
-			       nh->nh_scope == RT_SCOPE_LINK))) {
-			do_cache = false;
-			goto add;
-		}
-		prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		rth = rcu_dereference(*prth);
-
-rt_cache:
 		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
 			return rth;
 	}
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] ipv4: fix fnhe usage by non-cached routes
  2018-05-02  6:41 [PATCH net] ipv4: fix fnhe usage by non-cached routes Julian Anastasov
@ 2018-05-02 17:04 ` David Miller
  2018-05-02 22:49 ` David Ahern
  2018-05-03  2:55 ` David Miller
  2 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2018-05-02 17:04 UTC (permalink / raw)
  To: ja; +Cc: netdev, kafai, kernel-team, dsahern, lucien.xin

From: Julian Anastasov <ja@ssi.bg>
Date: Wed,  2 May 2018 09:41:19 +0300

> Allow some non-cached routes to use non-expired fnhe:
 ...
> Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic")
> Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif")
> Fixes: deed49df7390 ("route: check and remove route cache when we get route")
> Cc: David Ahern <dsahern@gmail.com>
> Cc: Xin Long <lucien.xin@gmail.com>
> Signed-off-by: Julian Anastasov <ja@ssi.bg>

David A., please review.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] ipv4: fix fnhe usage by non-cached routes
  2018-05-02  6:41 [PATCH net] ipv4: fix fnhe usage by non-cached routes Julian Anastasov
  2018-05-02 17:04 ` David Miller
@ 2018-05-02 22:49 ` David Ahern
  2018-05-03  5:32   ` Julian Anastasov
  2018-05-03  2:55 ` David Miller
  2 siblings, 1 reply; 5+ messages in thread
From: David Ahern @ 2018-05-02 22:49 UTC (permalink / raw)
  To: Julian Anastasov, David Miller
  Cc: netdev, Martin KaFai Lau, kernel-team, Xin Long

On 5/2/18 12:41 AM, Julian Anastasov wrote:
> Allow some non-cached routes to use non-expired fnhe:
> 
> 1. ip_del_fnhe: moved above and now called by find_exception.
> The 4.5+ commit deed49df7390 expires fnhe only when caching
> routes. Change that to:
> 
> 1.1. use fnhe for non-cached local output routes, with the help
> from (2)
> 
> 1.2. allow __mkroute_input to detect expired fnhe (outdated
> fnhe_gw, for example) when do_cache is false, eg. when itag!=0
> for unicast destinations.
> 
> 2. __mkroute_output: keep fi to allow local routes with orig_oif != 0
> to use fnhe info even when the new route will not be cached into fnhe.
> After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib
> result for local traffic") it means all local routes will be affected
> because they are not cached. This change is used to solve a PMTU
> problem with IPVS (and probably Netfilter DNAT) setups that redirect
> local clients from target local IP (local route to Virtual IP)
> to new remote IP target, eg. IPVS TUN real server. Loopback has
> 64K MTU and we need to create fnhe on the local route that will
> keep the reduced PMTU for the Virtual IP. Without this change
> fnhe_pmtu is updated from ICMP but never exposed to non-cached
> local routes. This includes routes with flowi4_oif!=0 for 4.6+ and
> with flowi4_oif=any for 4.14+).

Can you add a test case to tools/testing/selftests/net/pmtu.sh to cover
this situation?


> @@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
>  
>  	for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
>  	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
> -		if (fnhe->fnhe_daddr == daddr)
> +		if (fnhe->fnhe_daddr == daddr) {
> +			if (fnhe->fnhe_expires &&
> +			    time_after(jiffies, fnhe->fnhe_expires)) {
> +				ip_del_fnhe(nh, daddr);

I'm surprised this is done in the fast path vs gc time. (the existing
code does as well; your change is only moving the call to make the input
and output paths the same)


The change looks correct to me and all of my functional tests passed.

Acked-by: David Ahern <dsahern@gmail.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] ipv4: fix fnhe usage by non-cached routes
  2018-05-02  6:41 [PATCH net] ipv4: fix fnhe usage by non-cached routes Julian Anastasov
  2018-05-02 17:04 ` David Miller
  2018-05-02 22:49 ` David Ahern
@ 2018-05-03  2:55 ` David Miller
  2 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2018-05-03  2:55 UTC (permalink / raw)
  To: ja; +Cc: netdev, kafai, kernel-team, dsahern, lucien.xin

From: Julian Anastasov <ja@ssi.bg>
Date: Wed,  2 May 2018 09:41:19 +0300

> Allow some non-cached routes to use non-expired fnhe:
> 
> 1. ip_del_fnhe: moved above and now called by find_exception.
> The 4.5+ commit deed49df7390 expires fnhe only when caching
> routes. Change that to:
> 
> 1.1. use fnhe for non-cached local output routes, with the help
> from (2)
> 
> 1.2. allow __mkroute_input to detect expired fnhe (outdated
> fnhe_gw, for example) when do_cache is false, eg. when itag!=0
> for unicast destinations.
> 
> 2. __mkroute_output: keep fi to allow local routes with orig_oif != 0
> to use fnhe info even when the new route will not be cached into fnhe.
> After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib
> result for local traffic") it means all local routes will be affected
> because they are not cached. This change is used to solve a PMTU
> problem with IPVS (and probably Netfilter DNAT) setups that redirect
> local clients from target local IP (local route to Virtual IP)
> to new remote IP target, eg. IPVS TUN real server. Loopback has
> 64K MTU and we need to create fnhe on the local route that will
> keep the reduced PMTU for the Virtual IP. Without this change
> fnhe_pmtu is updated from ICMP but never exposed to non-cached
> local routes. This includes routes with flowi4_oif!=0 for 4.6+ and
> with flowi4_oif=any for 4.14+).
> 
> 3. update_or_create_fnhe: make sure fnhe_expires is not 0 for
> new entries
> 
> Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic")
> Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif")
> Fixes: deed49df7390 ("route: check and remove route cache when we get route")
> Cc: David Ahern <dsahern@gmail.com>
> Cc: Xin Long <lucien.xin@gmail.com>
> Signed-off-by: Julian Anastasov <ja@ssi.bg>

Applied and queued up for -stable, thanks Julian.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] ipv4: fix fnhe usage by non-cached routes
  2018-05-02 22:49 ` David Ahern
@ 2018-05-03  5:32   ` Julian Anastasov
  0 siblings, 0 replies; 5+ messages in thread
From: Julian Anastasov @ 2018-05-03  5:32 UTC (permalink / raw)
  To: David Ahern; +Cc: David Miller, netdev, Martin KaFai Lau, kernel-team, Xin Long


	Hello,

On Wed, 2 May 2018, David Ahern wrote:

> On 5/2/18 12:41 AM, Julian Anastasov wrote:
> > Allow some non-cached routes to use non-expired fnhe:
> > 
> > 1. ip_del_fnhe: moved above and now called by find_exception.
> > The 4.5+ commit deed49df7390 expires fnhe only when caching
> > routes. Change that to:
> > 
> > 1.1. use fnhe for non-cached local output routes, with the help
> > from (2)
> > 
> > 1.2. allow __mkroute_input to detect expired fnhe (outdated
> > fnhe_gw, for example) when do_cache is false, eg. when itag!=0
> > for unicast destinations.
> > 
> > 2. __mkroute_output: keep fi to allow local routes with orig_oif != 0
> > to use fnhe info even when the new route will not be cached into fnhe.
> > After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib
> > result for local traffic") it means all local routes will be affected
> > because they are not cached. This change is used to solve a PMTU
> > problem with IPVS (and probably Netfilter DNAT) setups that redirect
> > local clients from target local IP (local route to Virtual IP)
> > to new remote IP target, eg. IPVS TUN real server. Loopback has
> > 64K MTU and we need to create fnhe on the local route that will
> > keep the reduced PMTU for the Virtual IP. Without this change
> > fnhe_pmtu is updated from ICMP but never exposed to non-cached
> > local routes. This includes routes with flowi4_oif!=0 for 4.6+ and
> > with flowi4_oif=any for 4.14+).
> 
> Can you add a test case to tools/testing/selftests/net/pmtu.sh to cover
> this situation?

	Sure, I'll give it a try.

> > @@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
> >  
> >  	for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
> >  	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
> > -		if (fnhe->fnhe_daddr == daddr)
> > +		if (fnhe->fnhe_daddr == daddr) {
> > +			if (fnhe->fnhe_expires &&
> > +			    time_after(jiffies, fnhe->fnhe_expires)) {
> > +				ip_del_fnhe(nh, daddr);
> 
> I'm surprised this is done in the fast path vs gc time. (the existing
> code does as well; your change is only moving the call to make the input
> and output paths the same)
> 
> 
> The change looks correct to me and all of my functional tests passed.
> 
> Acked-by: David Ahern <dsahern@gmail.com>

	Thanks for the review!

Regards

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-05-03  5:33 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-02  6:41 [PATCH net] ipv4: fix fnhe usage by non-cached routes Julian Anastasov
2018-05-02 17:04 ` David Miller
2018-05-02 22:49 ` David Ahern
2018-05-03  5:32   ` Julian Anastasov
2018-05-03  2:55 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.