All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, David Wragg <david@weave.works>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.4 21/75] vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices
Date: Wed, 22 Jun 2016 15:40:43 -0700	[thread overview]
Message-ID: <20160622223501.115189345@linuxfoundation.org> (raw)
In-Reply-To: <20160622223500.055133765@linuxfoundation.org>

4.4-stable review patch.  If anyone has any objections, please let me know.

------------------

From: David Wragg <david@weave.works>

[ Upstream commit 7e059158d57b79159eaf1f504825d19866ef2c42 ]

Prior to 4.3, openvswitch tunnel vports (vxlan, gre and geneve) could
transmit vxlan packets of any size, constrained only by the ability to
send out the resulting packets.  4.3 introduced netdevs corresponding
to tunnel vports.  These netdevs have an MTU, which limits the size of
a packet that can be successfully encapsulated.  The default MTU
values are low (1500 or less), which is awkwardly small in the context
of physical networks supporting jumbo frames, and leads to a
conspicuous change in behaviour for userspace.

Instead, set the MTU on openvswitch-created netdevs to be the relevant
maximum (i.e. the maximum IP packet size minus any relevant overhead),
effectively restoring the behaviour prior to 4.3.

Signed-off-by: David Wragg <david@weave.works>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/geneve.c          |   18 ++++++++++++++----
 drivers/net/vxlan.c           |   11 ++++++++---
 include/net/ip_tunnels.h      |    1 +
 net/ipv4/ip_gre.c             |    8 ++++++++
 net/ipv4/ip_tunnel.c          |   20 +++++++++++++++++---
 net/openvswitch/vport-vxlan.c |    2 ++
 6 files changed, 50 insertions(+), 10 deletions(-)

--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1356,11 +1356,21 @@ struct net_device *geneve_dev_create_fb(
 
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
 			       0, 0, 0, htons(dst_port), true);
-	if (err) {
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto err;
+
+	/* openvswitch users expect packet sizes to be unrestricted,
+	 * so set the largest MTU we can.
+	 */
+	err = geneve_change_mtu(dev, IP_MAX_MTU);
+	if (err)
+		goto err;
+
 	return dev;
+
+ err:
+	free_netdev(dev);
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
 
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2776,6 +2776,7 @@ static int vxlan_dev_configure(struct ne
 	int err;
 	bool use_ipv6 = false;
 	__be16 default_port = vxlan->cfg.dst_port;
+	struct net_device *lowerdev = NULL;
 
 	vxlan->net = src_net;
 
@@ -2796,9 +2797,7 @@ static int vxlan_dev_configure(struct ne
 	}
 
 	if (conf->remote_ifindex) {
-		struct net_device *lowerdev
-			 = __dev_get_by_index(src_net, conf->remote_ifindex);
-
+		lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
 		dst->remote_ifindex = conf->remote_ifindex;
 
 		if (!lowerdev) {
@@ -2822,6 +2821,12 @@ static int vxlan_dev_configure(struct ne
 		needed_headroom = lowerdev->hard_header_len;
 	}
 
+	if (conf->mtu) {
+		err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
+		if (err)
+			return err;
+	}
+
 	if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
 		needed_headroom += VXLAN6_HEADROOM;
 	else
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb,
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 		    u8 *protocol, struct flowi4 *fl4);
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1247,6 +1247,14 @@ struct net_device *gretap_fb_dev_create(
 	err = ipgre_newlink(net, dev, tb, NULL);
 	if (err < 0)
 		goto out;
+
+	/* openvswitch users expect packet sizes to be unrestricted,
+	 * so set the largest MTU we can.
+	 */
+	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+	if (err)
+		goto out;
+
 	return dev;
 out:
 	free_netdev(dev);
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -948,17 +948,31 @@ done:
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+	int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
 
-	if (new_mtu < 68 ||
-	    new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
+	if (new_mtu < 68)
 		return -EINVAL;
+
+	if (new_mtu > max_mtu) {
+		if (strict)
+			return -EINVAL;
+
+		new_mtu = max_mtu;
+	}
+
 	dev->mtu = new_mtu;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
+
+int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	return __ip_tunnel_change_mtu(dev, new_mtu, true);
+}
 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 
 static void ip_tunnel_dev_free(struct net_device *dev)
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -91,6 +91,8 @@ static struct vport *vxlan_tnl_create(co
 	struct vxlan_config conf = {
 		.no_share = true,
 		.flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+		/* Don't restrict the packets that can be sent by MTU */
+		.mtu = IP_MAX_MTU,
 	};
 
 	if (!options) {

  parent reply	other threads:[~2016-06-22 23:19 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-22 22:40 [PATCH 4.4 00/75] 4.4.14-stable review Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 01/75] scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 02/75] scsi: Add QEMU CD-ROM to VPD Inquiry Blacklist Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 03/75] tipc: check nl sock before parsing nested attributes Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 04/75] netlink: Fix dump skb leak/double free Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 05/75] tipc: fix nametable publication field in nl compat Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 06/75] switchdev: pass pointer to fib_info instead of copy Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 07/75] tuntap: correctly wake up process during uninit Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 08/75] bpf: Use mount_nodev not mount_ns to mount the bpf filesystem Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 09/75] udp: prevent skbs lingering in tunnel socket queues Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 10/75] uapi glibc compat: fix compilation when !__USE_MISC in glibc Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 11/75] bpf, inode: disallow userns mounts Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 12/75] sfc: on MC reset, clear PIO buffer linkage in TXQs Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 13/75] team: dont call netdev_change_features under team->lock Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 14/75] vxlan: Accept user specified MTU value when create new vxlan link Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 15/75] tcp: record TLP and ER timer stats in v6 stats Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 16/75] bridge: Dont insert unnecessary local fdb entry on changing mac address Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 17/75] l2tp: fix configuration passed to setup_udp_tunnel_sock() Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 18/75] ipv6: Skip XFRM lookup if dst_entry in socket cache is valid Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 19/75] vxlan: Relax MTU constraints Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 20/75] geneve: " Greg Kroah-Hartman
2016-06-22 22:40 ` Greg Kroah-Hartman [this message]
2016-06-22 22:40 ` [PATCH 4.4 24/75] ALSA: hda - Add PCI ID for Kabylake Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 25/75] ALSA: hda - Fix headset mic detection problem for Dell machine Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 26/75] ALSA: hda/realtek - ALC256 speaker noise issue Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 27/75] ALSA: hda/realtek - Add support for new codecs ALC700/ALC701/ALC703 Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 28/75] ALSA: hda/realtek: Add T560 docking unit fixup Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 29/75] ARM: fix PTRACE_SETVFPREGS on SMP systems Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 30/75] gpio: bcm-kona: fix bcm_kona_gpio_reset() warnings Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 31/75] s390/bpf: fix recache skb->data/hlen for skb_vlan_push/pop Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 32/75] s390/bpf: reduce maximum program size to 64 KB Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 33/75] irqchip/gic-v3: Fix ICC_SGI1R_EL1.INTID decoding mask Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 34/75] crypto: public_key: select CRYPTO_AKCIPHER Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 35/75] crypto: ccp - Fix AES XTS error for request sizes above 4096 Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 36/75] arm64: Provide "model name" in /proc/cpuinfo for PER_LINUX32 tasks Greg Kroah-Hartman
2016-06-22 22:40 ` [PATCH 4.4 37/75] arm64: mm: always take dirty state from new pte in ptep_set_access_flags Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 38/75] powerpc/pseries/eeh: Handle RTAS delay requests in configure_bridge Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 39/75] powerpc: Fix definition of SIAR and SDAR registers Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 40/75] powerpc: Use privileged SPR number for MMCR2 Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 41/75] powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 42/75] pinctrl: mediatek: fix dual-edge code defect Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 43/75] parisc: Fix pagefault crash in unaligned __get_user() call Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 44/75] memcg: add RCU locking around css_for_each_descendant_pre() in memcg_offline_kmem() Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 45/75] ecryptfs: forbid opening files without mmap handler Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 46/75] wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 47/75] x86/entry/traps: Dont force in_interrupt() to return true in IST handlers Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 48/75] proc: prevent stacking filesystems on top Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 49/75] sched: panic on corrupted stack end Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 50/75] fix d_walk()/non-delayed __d_free() race Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 51/75] sparc: Fix system call tracing register handling Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 52/75] sparc64: Fix bootup regressions on some Kconfig combinations Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 53/75] sparc64: Fix numa node distance initialization Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 54/75] sparc64: Fix sparc64_set_context stack handling Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 55/75] sparc/PCI: Fix for panic while enabling SR-IOV Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 56/75] sparc64: Reduce TLB flushes during hugepte changes Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 57/75] sparc64: Take ctx_alloc_lock properly in hugetlb_setup() Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 58/75] sparc: Harden signal return frame checks Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 59/75] sparc64: Fix return from trap window fill crashes Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 60/75] MIPS: Fix 64k page support for 32 bit kernels Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 61/75] netfilter: x_tables: validate e->target_offset early Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 62/75] netfilter: x_tables: make sure e->next_offset covers remaining blob size Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 63/75] netfilter: x_tables: fix unconditional helper Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 64/75] crypto: qat - fix adf_ctl_drv.c:undefined reference to adf_init_pf_wq Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 65/75] drm/core: Do not preserve framebuffer on rmfb, v4 Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 66/75] netfilter: x_tables: dont move to non-existent next rule Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 67/75] netfilter: x_tables: validate targets of jumps Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 68/75] netfilter: x_tables: add and use xt_check_entry_offsets Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 69/75] netfilter: x_tables: kill check_entry helper Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 70/75] netfilter: x_tables: assert minimum target size Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 71/75] netfilter: x_tables: add compat version of xt_check_entry_offsets Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 72/75] netfilter: x_tables: check standard target size too Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 73/75] netfilter: x_tables: check for bogus target offset Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 74/75] netfilter: x_tables: validate all offsets and sizes in a rule Greg Kroah-Hartman
2016-06-22 22:41 ` [PATCH 4.4 75/75] netfilter: x_tables: dont reject valid target size on some architectures Greg Kroah-Hartman
2016-06-23  4:54 ` [PATCH 4.4 00/75] 4.4.14-stable review -rc2 Greg Kroah-Hartman
2016-06-23 16:21   ` Kevin Hilman
2016-06-24 17:14     ` Greg Kroah-Hartman
2016-06-23 19:43   ` Guenter Roeck
2016-06-23 21:53   ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160622223501.115189345@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=david@weave.works \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.