linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: mtk81216 <lina.wang@mediatek.com>
To: "David S . Miller" <davem@davemloft.net>,
	Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>,
	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>,
	Jakub Kicinski <kuba@kernel.org>,
	Steffen Klassert <steffen.klassert@secunet.com>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	Matthias Brugger <matthias.bgg@gmail.com>
Cc: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-arm-kernel@lists.infradead.org>,
	<linux-mediatek@lists.infradead.org>,
	mtk81216 <lina.wang@mediatek.com>
Subject: [PATCH] xfrm:fragmented ipv4 tunnel packets in inner interface
Date: Wed, 9 Sep 2020 14:26:13 +0800	[thread overview]
Message-ID: <20200909062613.18604-1-lina.wang@mediatek.com> (raw)

In esp's tunnel mode,if inner interface is ipv4,outer is ipv4,one big 
packet which travels through tunnel will be fragmented with outer 
interface's mtu,peer server will remove tunnelled esp header and assemble
them in big packet.After forwarding such packet to next endpoint,it will 
be dropped because of exceeding mtu or be returned ICMP(packet-too-big).
When inner interface is ipv4,outer is ipv6,the flag of xfrm state in tunnel
mode is af-unspec, thing is different.One big packet through tunnel will be
fragmented with outer interface's mtu minus tunneled header, then two or 
more less fragmented packets will be tunneled and transmitted in outer 
interface,that is what xfrm6_output has done. If peer server receives such
packets, it will forward successfully to next because length is valid.

This patch has followed up xfrm6_output's logic,which includes two changes,
one is choosing suitable mtu value which considering innner/outer 
interface's mtu and dst path, the other is if packet is too big, calling 
ip_fragment first,then tunnelling fragmented packets in outer interface and
transmitting finally.

Signed-off-by: mtk81216 <lina.wang@mediatek.com>
---
 include/net/ip.h        |  3 +++
 net/ipv4/ip_output.c    | 10 +++-------
 net/ipv4/xfrm4_output.c | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index b09c48d862cc..05f9c6454ff5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -163,6 +163,9 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		   int (*output)(struct net *, struct sock *, struct sk_buff *));
+int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		unsigned int mtu,
+		int (*output)(struct net *, struct sock *, struct sk_buff *));
 
 struct ip_fraglist_iter {
 	struct sk_buff	*frag;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 61f802d5350c..f99249132a76 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -82,10 +82,6 @@
 #include <linux/netlink.h>
 #include <linux/tcp.h>
 
-static int
-ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
-	    unsigned int mtu,
-	    int (*output)(struct net *, struct sock *, struct sk_buff *));
 
 /* Generate a checksum for an outgoing IP datagram. */
 void ip_send_check(struct iphdr *iph)
@@ -569,9 +565,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
-		       unsigned int mtu,
-		       int (*output)(struct net *, struct sock *, struct sk_buff *))
+int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		unsigned int mtu,
+		int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct iphdr *iph = ip_hdr(skb);
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 3cff51ba72bb..1488b79186ad 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,8 +14,27 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 
+static int __xfrm4_output_finish(struct net *net, struct sock *sk,
+				 struct sk_buff *skb)
+{
+	return xfrm_output(sk, skb);
+}
+
+static inline int ip4_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct inet_sock *np = skb->sk && !dev_recursion_level() ?
+				inet_sk(skb->sk) : NULL;
+
+	return (np & np->pmtudisc >= IP_PMTUDISC_PROBE) ?
+		skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+}
+
 static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	int mtu;
+	bool toobig;
+	struct xfrm_state *x = skb_dst(skb)->xfrm;
+
 #ifdef CONFIG_NETFILTER
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
@@ -25,6 +44,24 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 
+	if (x->props.mode != XFRM_MODE_TUNNEL)
+		goto skip_frag;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		mtu = ip4_skb_dst_mtu(skb);
+	else
+		goto skip_frag;
+
+	toobig = skb->len > mtu && !skb_is_gso(skb);
+	if (!skb->ignore_df && toobig && skb->sk) {
+		xfrm_local_error(skb, mtu);
+		return -EMSGSIZE;
+	}
+
+	if (toobig || dst_allfrag(skb_dst(skb)))
+		return ip_fragment(net, sk, skb, mtu, __xfrm4_output_finish);
+
+skip_frag:
 	return xfrm_output(sk, skb);
 }
 
-- 
2.18.0

             reply	other threads:[~2020-09-09  6:31 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-09  6:26 mtk81216 [this message]
2020-09-15  7:30 ` [PATCH] xfrm:fragmented ipv4 tunnel packets in inner interface Steffen Klassert
     [not found]   ` <1600160722.5295.15.camel@mbjsdccf07>
     [not found]     ` <20200915093230.GS20687@gauss3.secunet.de>
     [not found]       ` <1600172260.2494.2.camel@mbjsdccf07>
     [not found]         ` <20200917074637.GV20687@gauss3.secunet.de>
     [not found]           ` <1600341549.32639.5.camel@mbjsdccf07>
     [not found]             ` <1604547381.23648.14.camel@mbjsdccf07>
2020-11-05  4:41               ` Maciej Żenczykowski
2020-11-05  4:52   ` Lorenzo Colitti
2020-11-09  9:58     ` Steffen Klassert
2020-11-09 19:38       ` Maciej Żenczykowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200909062613.18604-1-lina.wang@mediatek.com \
    --to=lina.wang@mediatek.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=kuba@kernel.org \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mediatek@lists.infradead.org \
    --cc=matthias.bgg@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=steffen.klassert@secunet.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).