linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Sabrina Dubroca <sd@queasysnail.net>,
	Stefano Brivio <sbrivio@redhat.com>,
	David Ahern <dsahern@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.9 30/71] net: ipv4: update fnhe_pmtu when first hops MTU changes
Date: Tue, 16 Oct 2018 19:09:27 +0200	[thread overview]
Message-ID: <20181016170540.905851632@linuxfoundation.org> (raw)
In-Reply-To: <20181016170539.315587743@linuxfoundation.org>

4.9-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Sabrina Dubroca <sd@queasysnail.net>

[ Upstream commit af7d6cce53694a88d6a1bb60c9a239a6a5144459 ]

Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop
exceptions"), exceptions get deprecated separately from cached
routes. In particular, administrative changes don't clear PMTU anymore.

As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes
on PMTU of exceptions for MTU-less routes"), the PMTU discovered before
the local MTU change can become stale:
 - if the local MTU is now lower than the PMTU, that PMTU is now
   incorrect
 - if the local MTU was the lowest value in the path, and is increased,
   we might discover a higher PMTU

Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those
cases.

If the exception was locked, the discovered PMTU was smaller than the
minimal accepted PMTU. In that case, if the new local MTU is smaller
than the current PMTU, let PMTU discovery figure out if locking of the
exception is still needed.

To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU
notifier. By the time the notifier is called, dev->mtu has been
changed. This patch adds the old MTU as additional information in the
notifier structure, and a new call_netdevice_notifiers_u32() function.

Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/netdevice.h |    7 ++++++
 include/net/ip_fib.h      |    1 
 net/core/dev.c            |   28 +++++++++++++++++++++++--
 net/ipv4/fib_frontend.c   |   12 +++++++----
 net/ipv4/fib_semantics.c  |   50 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 92 insertions(+), 6 deletions(-)

--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2338,6 +2338,13 @@ struct netdev_notifier_info {
 	struct net_device *dev;
 };
 
+struct netdev_notifier_info_ext {
+	struct netdev_notifier_info info; /* must be first */
+	union {
+		u32 mtu;
+	} ext;
+};
+
 struct netdev_notifier_change_info {
 	struct netdev_notifier_info info; /* must be first */
 	unsigned int flags_changed;
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -363,6 +363,7 @@ int ip_fib_check_default(__be32 gw, stru
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
 
 extern u32 fib_multipath_secret __read_mostly;
 
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1664,6 +1664,28 @@ int call_netdevice_notifiers(unsigned lo
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
+/**
+ *	call_netdevice_notifiers_mtu - call all network notifier blocks
+ *	@val: value passed unmodified to notifier function
+ *	@dev: net_device pointer passed unmodified to notifier function
+ *	@arg: additional u32 argument passed to the notifier function
+ *
+ *	Call all network notifier blocks.  Parameters and return value
+ *	are as for raw_notifier_call_chain().
+ */
+static int call_netdevice_notifiers_mtu(unsigned long val,
+					struct net_device *dev, u32 arg)
+{
+	struct netdev_notifier_info_ext info = {
+		.info.dev = dev,
+		.ext.mtu = arg,
+	};
+
+	BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+	return call_netdevice_notifiers_info(val, dev, &info.info);
+}
+
 #ifdef CONFIG_NET_INGRESS
 static struct static_key ingress_needed __read_mostly;
 
@@ -6589,14 +6611,16 @@ int dev_set_mtu(struct net_device *dev,
 	err = __dev_set_mtu(dev, new_mtu);
 
 	if (!err) {
-		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+		err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+						   orig_mtu);
 		err = notifier_to_errno(err);
 		if (err) {
 			/* setting mtu back and notifying everyone again,
 			 * so that they have a chance to revert changes.
 			 */
 			__dev_set_mtu(dev, orig_mtu);
-			call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+			call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+						     new_mtu);
 		}
 	}
 	return err;
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1171,7 +1171,8 @@ static int fib_inetaddr_event(struct not
 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct netdev_notifier_changeupper_info *info;
+	struct netdev_notifier_changeupper_info *upper_info = ptr;
+	struct netdev_notifier_info_ext *info_ext = ptr;
 	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
 	unsigned int flags;
@@ -1206,16 +1207,19 @@ static int fib_netdev_event(struct notif
 			fib_sync_up(dev, RTNH_F_LINKDOWN);
 		else
 			fib_sync_down_dev(dev, event, false);
-		/* fall through */
+		rt_cache_flush(net);
+		break;
 	case NETDEV_CHANGEMTU:
+		fib_sync_mtu(dev, info_ext->ext.mtu);
 		rt_cache_flush(net);
 		break;
 	case NETDEV_CHANGEUPPER:
-		info = ptr;
+		upper_info = ptr;
 		/* flush all routes if dev is linked to or unlinked from
 		 * an L3 master device (e.g., VRF)
 		 */
-		if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+		if (upper_info->upper_dev &&
+		    netif_is_l3_master(upper_info->upper_dev))
 			fib_disable_ip(dev, NETDEV_DOWN, true);
 		break;
 	}
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1377,6 +1377,56 @@ int fib_sync_down_addr(struct net_device
 	return ret;
 }
 
+/* Update the PMTU of exceptions when:
+ * - the new MTU of the first hop becomes smaller than the PMTU
+ * - the old MTU was the same as the PMTU, and it limited discovery of
+ *   larger MTUs on the path. With that limit raised, we can now
+ *   discover larger MTUs
+ * A special case is locked exceptions, for which the PMTU is smaller
+ * than the minimal accepted PMTU:
+ * - if the new MTU is greater than the PMTU, don't make any change
+ * - otherwise, unlock and set PMTU
+ */
+static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+{
+	struct fnhe_hash_bucket *bucket;
+	int i;
+
+	bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+	if (!bucket)
+		return;
+
+	for (i = 0; i < FNHE_HASH_SIZE; i++) {
+		struct fib_nh_exception *fnhe;
+
+		for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
+		     fnhe;
+		     fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
+			if (fnhe->fnhe_mtu_locked) {
+				if (new <= fnhe->fnhe_pmtu) {
+					fnhe->fnhe_pmtu = new;
+					fnhe->fnhe_mtu_locked = false;
+				}
+			} else if (new < fnhe->fnhe_pmtu ||
+				   orig == fnhe->fnhe_pmtu) {
+				fnhe->fnhe_pmtu = new;
+			}
+		}
+	}
+}
+
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+	struct hlist_head *head = &fib_info_devhash[hash];
+	struct fib_nh *nh;
+
+	hlist_for_each_entry(nh, head, nh_hash) {
+		if (nh->nh_dev == dev)
+			nh_update_mtu(nh, dev->mtu, orig_mtu);
+	}
+}
+
 /* Event              force Flags           Description
  * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
  * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host



  parent reply	other threads:[~2018-10-16 17:23 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-16 17:08 [PATCH 4.9 00/71] 4.9.134-stable review Greg Kroah-Hartman
2018-10-16 17:08 ` [PATCH 4.9 01/71] ASoC: wm8804: Add ACPI support Greg Kroah-Hartman
2018-10-16 17:08 ` [PATCH 4.9 02/71] ASoC: sigmadsp: safeload should not have lower byte limit Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 03/71] selftests/efivarfs: add required kernel configs Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 04/71] selftests: memory-hotplug: add required configs Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 05/71] mfd: omap-usb-host: Fix dts probe of children Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 06/71] scsi: iscsi: target: Dont use stack buffer for scatterlist Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 07/71] scsi: qla2xxx: Fix an endian bug in fcpcmd_is_corrupted() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 08/71] sound: enable interrupt after dma buffer initialization Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 09/71] stmmac: fix valid numbers of unicast filter entries Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 10/71] net: macb: disable scatter-gather for macb on sama5d3 Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 11/71] ARM: dts: at91: add new compatibility string " Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 12/71] x86/kvm/lapic: always disable MMIO interface in x2APIC mode Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 13/71] drm/amdgpu: Fix SDMA HQD destroy error on gfx_v7 Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 14/71] ext4: Fix error code in ext4_xattr_set_entry() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 15/71] mm/vmstat.c: fix outdated vmstat_text Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 16/71] MIPS: VDSO: Always map near top of user memory Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 17/71] mach64: detect the dot clock divider correctly on sparc Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 18/71] perf script python: Fix export-to-postgresql.py occasional failure Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 19/71] mm: Preserve _PAGE_DEVMAP across mprotect() calls Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 20/71] i2c: i2c-scmi: fix for i2c_smbus_write_block_data Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 21/71] xhci: Dont print a warning when setting link state for disabled ports Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 22/71] bnxt_en: Fix TX timeout during netpoll Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 23/71] bonding: avoid possible dead-lock Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 24/71] ip6_tunnel: be careful when accessing the inner header Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 25/71] ip_tunnel: " Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 26/71] ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 27/71] ipv6: take rcu lock in rawv6_send_hdrinc() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 28/71] net: dsa: bcm_sf2: Call setup during switch resume Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 29/71] net: hns: fix for unmapping problem when SMMU is on Greg Kroah-Hartman
2018-10-16 17:09 ` Greg Kroah-Hartman [this message]
2018-10-16 17:09 ` [PATCH 4.9 31/71] net/ipv6: Display all addresses in output of /proc/net/if_inet6 Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 32/71] netlabel: check for IPV4MASK in addrinfo_get Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 33/71] net/usb: cancel pending work when unbinding smsc75xx Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 34/71] qlcnic: fix Tx descriptor corruption on 82xx devices Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 35/71] qmi_wwan: Added support for Gemaltos Cinterion ALASxx WWAN interface Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 36/71] team: Forbid enslaving team device to itself Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 37/71] net: dsa: bcm_sf2: Fix unbind ordering Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 38/71] net: mvpp2: Extract the correct ethtype from the skb for tx csum offload Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 39/71] net: systemport: Fix wake-up interrupt race during resume Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 40/71] rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 41/71] tcp/dccp: fix lockdep issue when SYN is backlogged Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 42/71] inet: make sure to grab rcu_read_lock before using ireq->ireq_opt Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 43/71] inet: frags: change inet_frags_init_net() return value Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 44/71] inet: frags: add a pointer to struct netns_frags Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 45/71] inet: frags: refactor ipfrag_init() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 46/71] inet: frags: refactor ipv6_frag_init() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 47/71] inet: frags: refactor lowpan_net_frag_init() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 48/71] ipv6: export ip6 fragments sysctl to unprivileged users Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 49/71] rhashtable: add schedule points Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 50/71] inet: frags: use rhashtables for reassembly units Greg Kroah-Hartman
2018-10-26 13:39   ` Stefan Schmidt
2018-11-29 12:54     ` Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 51/71] inet: frags: remove some helpers Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 52/71] inet: frags: get rif of inet_frag_evicting() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 53/71] inet: frags: remove inet_frag_maybe_warn_overflow() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 54/71] inet: frags: break the 2GB limit for frags storage Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 55/71] inet: frags: do not clone skb in ip_expire() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 56/71] ipv6: frags: rewrite ip6_expire_frag_queue() Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 57/71] rhashtable: reorganize struct rhashtable layout Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 58/71] inet: frags: reorganize struct netns_frags Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 59/71] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 60/71] inet: frags: fix ip6frag_low_thresh boundary Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 61/71] ip: discard IPv4 datagrams with overlapping segments Greg Kroah-Hartman
2018-10-16 17:09 ` [PATCH 4.9 62/71] net: speed up skb_rbtree_purge() Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 63/71] net: modify skb_rbtree_purge to return the truesize of all purged skbs Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 64/71] ipv6: defrag: drop non-last frags smaller than min mtu Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 65/71] net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 66/71] net: add rb_to_skb() and other rb tree helpers Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 67/71] ip: use rb trees for IP frag queue Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 68/71] ip: add helpers to process in-order fragments faster Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 69/71] ip: process in-order fragments efficiently Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 70/71] ip: frags: fix crash in ip_do_fragment() Greg Kroah-Hartman
2018-10-16 17:10 ` [PATCH 4.9 71/71] ipv4: frags: precedence bug in ip_expire() Greg Kroah-Hartman
2018-10-17  7:20 ` [PATCH 4.9 00/71] 4.9.134-stable review Amit Pundir
2018-10-17  7:51   ` Greg Kroah-Hartman
2018-10-17 13:19 ` Guenter Roeck
2018-10-17 13:32   ` Greg Kroah-Hartman
2018-10-17 15:11 ` Rafael Tinoco
2018-10-17 18:43 ` Shuah Khan
2018-10-17 19:19 ` Guenter Roeck
2018-10-18  7:12   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181016170540.905851632@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sbrivio@redhat.com \
    --cc=sd@queasysnail.net \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).