All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, David Ahern <dsahern@gmail.com>,
	Xin Long <lucien.xin@gmail.com>, Julian Anastasov <ja@ssi.bg>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.16 04/55] ipv4: fix fnhe usage by non-cached routes
Date: Fri, 18 May 2018 10:15:00 +0200	[thread overview]
Message-ID: <20180518081457.609687515@linuxfoundation.org> (raw)
In-Reply-To: <20180518081457.428920292@linuxfoundation.org>

4.16-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Julian Anastasov <ja@ssi.bg>

[ Upstream commit 94720e3aee6884d8c8beb678001629da60ec6366 ]

Allow some non-cached routes to use non-expired fnhe:

1. ip_del_fnhe: moved above and now called by find_exception.
The 4.5+ commit deed49df7390 expires fnhe only when caching
routes. Change that to:

1.1. use fnhe for non-cached local output routes, with the help
from (2)

1.2. allow __mkroute_input to detect expired fnhe (outdated
fnhe_gw, for example) when do_cache is false, eg. when itag!=0
for unicast destinations.

2. __mkroute_output: keep fi to allow local routes with orig_oif != 0
to use fnhe info even when the new route will not be cached into fnhe.
After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib
result for local traffic") it means all local routes will be affected
because they are not cached. This change is used to solve a PMTU
problem with IPVS (and probably Netfilter DNAT) setups that redirect
local clients from target local IP (local route to Virtual IP)
to new remote IP target, eg. IPVS TUN real server. Loopback has
64K MTU and we need to create fnhe on the local route that will
keep the reduced PMTU for the Virtual IP. Without this change
fnhe_pmtu is updated from ICMP but never exposed to non-cached
local routes. This includes routes with flowi4_oif!=0 for 4.6+ and
with flowi4_oif=any for 4.14+).

3. update_or_create_fnhe: make sure fnhe_expires is not 0 for
new entries

Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic")
Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif")
Fixes: deed49df7390 ("route: check and remove route cache when we get route")
Cc: David Ahern <dsahern@gmail.com>
Cc: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/route.c |  118 ++++++++++++++++++++++++-------------------------------
 1 file changed, 53 insertions(+), 65 deletions(-)

--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -710,7 +710,7 @@ static void update_or_create_fnhe(struct
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
 		fnhe->fnhe_mtu_locked = lock;
-		fnhe->fnhe_expires = expires;
+		fnhe->fnhe_expires = max(1UL, expires);
 
 		/* Exception created; mark the cached routes for the nexthop
 		 * stale, so anyone caching it rechecks if this exception
@@ -1298,6 +1298,36 @@ static unsigned int ipv4_mtu(const struc
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+	struct fnhe_hash_bucket *hash;
+	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+	u32 hval = fnhe_hashfun(daddr);
+
+	spin_lock_bh(&fnhe_lock);
+
+	hash = rcu_dereference_protected(nh->nh_exceptions,
+					 lockdep_is_held(&fnhe_lock));
+	hash += hval;
+
+	fnhe_p = &hash->chain;
+	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+	while (fnhe) {
+		if (fnhe->fnhe_daddr == daddr) {
+			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			fnhe_flush_routes(fnhe);
+			kfree_rcu(fnhe, rcu);
+			break;
+		}
+		fnhe_p = &fnhe->fnhe_next;
+		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+						 lockdep_is_held(&fnhe_lock));
+	}
+
+	spin_unlock_bh(&fnhe_lock);
+}
+
 static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 {
 	struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1311,8 +1341,14 @@ static struct fib_nh_exception *find_exc
 
 	for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
-		if (fnhe->fnhe_daddr == daddr)
+		if (fnhe->fnhe_daddr == daddr) {
+			if (fnhe->fnhe_expires &&
+			    time_after(jiffies, fnhe->fnhe_expires)) {
+				ip_del_fnhe(nh, daddr);
+				break;
+			}
 			return fnhe;
+		}
 	}
 	return NULL;
 }
@@ -1638,36 +1674,6 @@ static void ip_handle_martian_source(str
 #endif
 }
 
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
-{
-	struct fnhe_hash_bucket *hash;
-	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
-	u32 hval = fnhe_hashfun(daddr);
-
-	spin_lock_bh(&fnhe_lock);
-
-	hash = rcu_dereference_protected(nh->nh_exceptions,
-					 lockdep_is_held(&fnhe_lock));
-	hash += hval;
-
-	fnhe_p = &hash->chain;
-	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
-	while (fnhe) {
-		if (fnhe->fnhe_daddr == daddr) {
-			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
-				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
-			fnhe_flush_routes(fnhe);
-			kfree_rcu(fnhe, rcu);
-			break;
-		}
-		fnhe_p = &fnhe->fnhe_next;
-		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
-						 lockdep_is_held(&fnhe_lock));
-	}
-
-	spin_unlock_bh(&fnhe_lock);
-}
-
 static void set_lwt_redirect(struct rtable *rth)
 {
 	if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
@@ -1734,20 +1740,10 @@ static int __mkroute_input(struct sk_buf
 
 	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
 	if (do_cache) {
-		if (fnhe) {
+		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
-			if (rth && rth->dst.expires &&
-			    time_after(jiffies, rth->dst.expires)) {
-				ip_del_fnhe(&FIB_RES_NH(*res), daddr);
-				fnhe = NULL;
-			} else {
-				goto rt_cache;
-			}
-		}
-
-		rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
-
-rt_cache:
+		else
+			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2224,39 +2220,31 @@ static struct rtable *__mkroute_output(c
 		 * the loopback interface and the IP_PKTINFO ipi_ifindex will
 		 * be set to the loopback interface as well.
 		 */
-		fi = NULL;
+		do_cache = false;
 	}
 
 	fnhe = NULL;
 	do_cache &= fi != NULL;
-	if (do_cache) {
+	if (fi) {
 		struct rtable __rcu **prth;
 		struct fib_nh *nh = &FIB_RES_NH(*res);
 
 		fnhe = find_exception(nh, fl4->daddr);
+		if (!do_cache)
+			goto add;
 		if (fnhe) {
 			prth = &fnhe->fnhe_rth_output;
-			rth = rcu_dereference(*prth);
-			if (rth && rth->dst.expires &&
-			    time_after(jiffies, rth->dst.expires)) {
-				ip_del_fnhe(nh, fl4->daddr);
-				fnhe = NULL;
-			} else {
-				goto rt_cache;
+		} else {
+			if (unlikely(fl4->flowi4_flags &
+				     FLOWI_FLAG_KNOWN_NH &&
+				     !(nh->nh_gw &&
+				       nh->nh_scope == RT_SCOPE_LINK))) {
+				do_cache = false;
+				goto add;
 			}
+			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		}
-
-		if (unlikely(fl4->flowi4_flags &
-			     FLOWI_FLAG_KNOWN_NH &&
-			     !(nh->nh_gw &&
-			       nh->nh_scope == RT_SCOPE_LINK))) {
-			do_cache = false;
-			goto add;
-		}
-		prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		rth = rcu_dereference(*prth);
-
-rt_cache:
 		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
 			return rth;
 	}

  parent reply	other threads:[~2018-05-18  8:15 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-18  8:14 [PATCH 4.16 00/55] 4.16.10-stable review Greg Kroah-Hartman
2018-05-18  8:14 ` [PATCH 4.16 01/55] 8139too: Use disable_irq_nosync() in rtl8139_poll_controller() Greg Kroah-Hartman
2018-05-18  8:14 ` [PATCH 4.16 02/55] bridge: check iface upper dev when setting master via ioctl Greg Kroah-Hartman
2018-05-18  8:14 ` [PATCH 4.16 03/55] dccp: fix tasklet usage Greg Kroah-Hartman
2018-05-18  8:14   ` Greg Kroah-Hartman
2018-05-18  8:15 ` Greg Kroah-Hartman [this message]
2018-05-18  8:15 ` [PATCH 4.16 05/55] ipv4: fix memory leaks in udp_sendmsg, ping_v4_sendmsg Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 06/55] llc: better deal with too small mtu Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 07/55] net: ethernet: sun: niu set correct packet size in skb Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 08/55] net: ethernet: ti: cpsw: fix packet leaking in dual_mac mode Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 09/55] net/mlx4_en: Fix an error handling path in mlx4_en_init_netdev() Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 10/55] net/mlx4_en: Verify coalescing parameters are in range Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 11/55] net/mlx5e: Err if asked to offload TC match on frag being first Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 12/55] net/mlx5: E-Switch, Include VF RDMA stats in vport statistics Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 13/55] net sched actions: fix refcnt leak in skbmod Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 14/55] net_sched: fq: take care of throttled flows before reuse Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 15/55] net/smc: restrict non-blocking connect finish Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 16/55] net: support compat 64-bit time in {s,g}etsockopt Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 17/55] net/tls: Dont recursively call push_record during tls_write_space callbacks Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 18/55] net/tls: Fix connection stall on partial tls record Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 19/55] openvswitch: Dont swap table in nlattr_set() after OVS_ATTR_NESTED is found Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 20/55] qmi_wwan: do not steal interfaces from class drivers Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 21/55] r8169: fix powering up RTL8168h Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 22/55] rds: do not leak kernel memory to user land Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 23/55] sctp: delay the authentication for the duplicated cookie-echo chunk Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 24/55] sctp: fix the issue that the cookie-ack with auth cant get processed Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 25/55] sctp: handle two v4 addrs comparison in sctp_inet6_cmp_addr Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 26/55] sctp: remove sctp_chunk_put from fail_mark err path in sctp_ulpevent_make_rcvmsg Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 27/55] sctp: use the old asoc when making the cookie-ack chunk in dupcook_d Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 28/55] tcp_bbr: fix to zero idle_restart only upon S/ACKed data Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 29/55] tcp: ignore Fast Open on repair mode Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 30/55] tg3: Fix vunmap() BUG_ON() triggered from tg3_free_consistent() Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 31/55] bonding: do not allow rlb updates to invalid mac Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 32/55] bonding: send learning packets for vlans on slave Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 33/55] net: sched: fix error path in tcf_proto_create() when modules are not configured Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 34/55] net/mlx5e: TX, Use correct counter in dma_map error flow Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 35/55] net/mlx5: Avoid cleaning flow steering table twice during " Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 36/55] hv_netvsc: set master device Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 37/55] ipv6: fix uninit-value in ip6_multipath_l3_keys() Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 38/55] net/mlx5e: Allow offloading ipv4 header re-write for icmp Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 39/55] nsh: fix infinite loop Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 40/55] udp: fix SO_BINDTODEVICE Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 41/55] mlxsw: spectrum_switchdev: Do not remove mrouter port from MDBs ports list Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 42/55] net/mlx5e: DCBNL fix min inline header size for dscp Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 43/55] net: systemport: Correclty disambiguate driver instances Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 44/55] nfp: flower: set tunnel ttl value to net default Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 45/55] sctp: clear the new asocs stream outcnt in sctp_stream_update Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 46/55] tcp: restore autocorking Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 47/55] tipc: fix one byte leak in tipc_sk_set_orig_addr() Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 48/55] hv_netvsc: Fix net device attach on older Windows hosts Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 49/55] ipv4: reset fnhe_mtu_locked after cache route flushed Greg Kroah-Hartman
2018-05-18  8:15   ` Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 50/55] mlxsw: core: Fix an error handling path in mlxsw_core_bus_device_register() Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 51/55] net/mlx5: Fix mlx5_get_vector_affinity function Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 52/55] net: phy: sfp: fix the BR,min computation Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 53/55] net/smc: keep clcsock reference in smc_tcp_listen_work() Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 54/55] scsi: aacraid: Correct hba_send to include iu_type Greg Kroah-Hartman
2018-05-18  8:15 ` [PATCH 4.16 55/55] proc: do not access cmdline nor environ from file-backed areas Greg Kroah-Hartman
2018-05-18 12:05 ` [PATCH 4.16 00/55] 4.16.10-stable review kernelci.org bot
2018-05-18 13:29 ` Guenter Roeck
2018-05-18 13:56   ` Greg Kroah-Hartman
2018-05-18 19:25 ` Naresh Kamboju
2018-05-19  7:43   ` Greg Kroah-Hartman
2018-05-18 20:45 ` Shuah Khan
2018-05-19  7:42   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180518081457.609687515@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@gmail.com \
    --cc=ja@ssi.bg \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lucien.xin@gmail.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.