All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Duyck <aduyck@mirantis.com>
To: herbert@gondor.apana.org.au, tom@herbertland.com,
	jesse@kernel.org, alexander.duyck@gmail.com, edumazet@google.com,
	netdev@vger.kernel.org, davem@davemloft.net
Subject: [RFC PATCH 04/11] GRO: Add support for TCP with fixed IPv4 ID field, limit tunnel IP ID values
Date: Thu, 07 Apr 2016 18:32:18 -0400	[thread overview]
Message-ID: <20160407223218.11142.26592.stgit@ahduyck-xeon-server> (raw)
In-Reply-To: <20160407222211.11142.41024.stgit@ahduyck-xeon-server>

This patch does two things.

First it allows TCP to aggregate TCP frames with a fixed IPv4 ID field.  As
a result we should now be able to aggregate flows that were converted from
IPv6 to IPv4.  In addition this allows us more flexibility for future
implementations of segmentation as we may be able to use a fixed IP ID when
segmenting the flow.

The second thing this addresses is that it places limitations on the outer
IPv4 ID header in the case of tunneled frames.  Specifically it forces the
IP ID to be incrementing by 1 unless the DF bit is set in the outer IPv4
header.  This way we can avoid creating overlapping series of IP IDs that
could possibly be fragmented if the frame goes through GRO and is then
resegmented via GSO.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---
 include/linux/netdevice.h |    5 ++++-
 net/core/dev.c            |    1 +
 net/ipv4/af_inet.c        |   35 ++++++++++++++++++++++++++++-------
 net/ipv4/tcp_offload.c    |   16 +++++++++++++++-
 net/ipv6/ip6_offload.c    |    8 ++++++--
 5 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 38ccc01eb97d..abf8cc2d9bfb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2123,7 +2123,10 @@ struct napi_gro_cb {
 	/* Used in GRE, set in fou/gue_gro_receive */
 	u8	is_fou:1;
 
-	/* 6 bit hole */
+	/* Used to determine if flush_id can be ignored */
+	u8	is_atomic:1;
+
+	/* 5 bit hole */
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
diff --git a/net/core/dev.c b/net/core/dev.c
index d51343a821ed..4ed2852b3706 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4440,6 +4440,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->encap_mark = 0;
 		NAPI_GRO_CB(skb)->is_fou = 0;
+		NAPI_GRO_CB(skb)->is_atomic = 1;
 		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 19e9a2c45d71..98fe04b99e01 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1328,6 +1328,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
 	for (p = *head; p; p = p->next) {
 		struct iphdr *iph2;
+		u16 flush_id;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
@@ -1351,16 +1352,36 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 			(iph->tos ^ iph2->tos) |
 			((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
 
-		/* Save the IP ID check to be included later when we get to
-		 * the transport layer so only the inner most IP ID is checked.
-		 * This is because some GSO/TSO implementations do not
-		 * correctly increment the IP ID for the outer hdrs.
-		 */
-		NAPI_GRO_CB(p)->flush_id =
-			    ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
 		NAPI_GRO_CB(p)->flush |= flush;
+
+		/* We need to store of the IP ID check to be included later
+		 * when we can verify that this packet does in fact belong
+		 * to a given flow.
+		 */
+		flush_id = (u16)(id - ntohs(iph2->id));
+
+		/* This bit of code makes it much easier for us to identify
+		 * the cases where we are doing atomic vs non-atomic IP ID
+		 * checks.  Specifically an atomic check can return IP ID
+		 * values 0 - 0xFFFF, while a non-atomic check can only
+		 * return 0 or 0xFFFF.
+		 */
+		if (!NAPI_GRO_CB(p)->is_atomic ||
+		    !(iph->frag_off & htons(IP_DF))) {
+			flush_id ^= NAPI_GRO_CB(p)->count;
+			flush_id = flush_id ? 0xFFFF : 0;
+		}
+
+		/* If the previous IP ID value was based on an atomic
+		 * datagram we can overwrite the value and ignore it.
+		 */
+		if (NAPI_GRO_CB(skb)->is_atomic)
+			NAPI_GRO_CB(p)->flush_id = flush_id;
+		else
+			NAPI_GRO_CB(p)->flush_id |= flush_id;
 	}
 
+	NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
 	NAPI_GRO_CB(skb)->flush |= flush;
 	skb_set_network_header(skb, off);
 	/* The above will be needed by the transport layer if there is one
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 08dd25d835af..d1ffd55289bd 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -239,7 +239,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 found:
 	/* Include the IP ID check below from the inner most IP hdr */
-	flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id;
+	flush = NAPI_GRO_CB(p)->flush;
 	flush |= (__force int)(flags & TCP_FLAG_CWR);
 	flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
 		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
@@ -248,6 +248,17 @@ found:
 		flush |= *(u32 *)((u8 *)th + i) ^
 			 *(u32 *)((u8 *)th2 + i);
 
+	/* When we receive our second frame we can made a decision on if we
+	 * continue this flow as an atomic flow with a fixed ID or if we use
+	 * an incrementing ID.
+	 */
+	if (NAPI_GRO_CB(p)->flush_id != 1 ||
+	    NAPI_GRO_CB(p)->count != 1 ||
+	    !NAPI_GRO_CB(p)->is_atomic)
+		flush |= NAPI_GRO_CB(p)->flush_id;
+	else
+		NAPI_GRO_CB(p)->is_atomic = false;
+
 	mss = skb_shinfo(p)->gso_size;
 
 	flush |= (len - 1) >= mss;
@@ -316,6 +327,9 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 				  iph->daddr, 0);
 	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
 
+	if (NAPI_GRO_CB(skb)->is_atomic)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
+
 	return tcp_gro_complete(skb);
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d7530b9a1d63..e9479499f58c 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -240,10 +240,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 		NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
 		NAPI_GRO_CB(p)->flush |= flush;
 
-		/* Clear flush_id, there's really no concept of ID in IPv6. */
-		NAPI_GRO_CB(p)->flush_id = 0;
+		/* If the previous IP ID value was based on an atomic
+		 * datagram we can overwrite the value and ignore it.
+		 */
+		if (NAPI_GRO_CB(skb)->is_atomic)
+			NAPI_GRO_CB(p)->flush_id = 0;
 	}
 
+	NAPI_GRO_CB(skb)->is_atomic = true;
 	NAPI_GRO_CB(skb)->flush |= flush;
 
 	skb_gro_postpull_rcsum(skb, iph, nlen);

  parent reply	other threads:[~2016-04-07 22:32 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-07 22:31 [RFC PATCH 00/11] GSO partial and TSO FIXEDID support Alexander Duyck
2016-04-07 22:31 ` [RFC PATCH 01/11] GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 02/11] ethtool: Add support for toggling any of the GSO offloads Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 03/11] GSO: Add GSO type for fixed IPv4 ID Alexander Duyck
2016-04-07 22:32 ` Alexander Duyck [this message]
2016-04-07 22:32 ` [RFC PATCH 05/11] GSO: Support partial segmentation offload Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 06/11] VXLAN: Add option to mangle IP IDs on inner headers when using TSO Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 07/11] GENEVE: " Alexander Duyck
2016-04-07 23:22   ` Jesse Gross
2016-04-07 23:52     ` Alexander Duyck
2016-04-08 21:40       ` Jesse Gross
2016-04-08 22:04         ` Alexander Duyck
2016-04-09 15:52           ` Jesse Gross
2016-04-09 17:36             ` Alexander Duyck
2016-04-09 18:02               ` Eric Dumazet
2016-04-09 18:32                 ` Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 08/11] Documentation: Add documentation for TSO and GSO features Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 09/11] i40e/i40evf: Add support for GSO partial with UDP_TUNNEL_CSUM and GRE_CSUM Alexander Duyck
2016-04-07 22:32 ` [RFC PATCH 10/11] ixgbe/ixgbevf: Add support for GSO partial Alexander Duyck
2016-04-07 22:33 ` [RFC PATCH 11/11] igb/igbvf: " Alexander Duyck
  -- strict thread matches above, loose matches on Subject: below --
2016-04-26 16:53 [PATCH v2 0/5] mmc: tmio: make CTL_STATUS handling consistent Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 1/5] mmc: tmio: give read32/write32 functions more descriptive names Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 2/5] mmc: tmio: use BIT() within defines Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 3/5] mmc: tmio: use CTL_STATUS consistently Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 4/5] mmc: tmio/sdhi: distinguish between SCLKDIVEN and ILL_FUNC Wolfram Sang
2016-04-26 16:53 ` [PATCH v2 5/5] mmc: tmio: document CTL_STATUS handling Wolfram Sang
2016-04-27  8:31 ` [PATCH v2 0/5] mmc: tmio: make CTL_STATUS handling consistent Ulf Hansson
2016-04-18 19:01 [PATCH net-next v2 0/2] BPF updates Daniel Borkmann
2016-04-18 19:01 ` [PATCH net-next v2 1/2] bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_output Daniel Borkmann
2016-04-18 19:01 ` [PATCH net-next v2 2/2] bpf: add event output helper for notifications/sampling/logging Daniel Borkmann
2016-04-20  0:26 ` [PATCH net-next v2 0/2] BPF updates David Miller
2016-04-08 15:45 [patch net-next 0/5] mlxsw: small driver update Jiri Pirko
2016-04-08 15:45 ` [patch net-next 1/5] mlxsw: Move devlink port registration into common core code Jiri Pirko
2016-04-08 15:45 ` [patch net-next 2/5] mlxsw: Pass mlxsw_core as a param of mlxsw_core_skb_transmit* Jiri Pirko
2016-04-08 15:45 ` [patch net-next 3/5] mlxsw: Do not pass around driver_priv directly Jiri Pirko
2016-04-08 15:45 ` [patch net-next 4/5] mlxsw: reg: Share direction enum between SBPR, SBCM, SBPM Jiri Pirko
2016-04-08 15:45 ` [patch net-next 5/5] mlxsw: reg: Fix SBPM register name Jiri Pirko
2016-04-08 15:51 ` [patch net-next 0/5] mlxsw: small driver update Jiri Pirko
2016-04-08 17:07   ` David Miller
2016-04-08 17:11     ` Jiri Pirko
2016-04-07 18:39 [PATCH v5 net-next 00/15] MTU/buffer reconfig changes Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 01/15] nfp: correct RX buffer length calculation Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 02/15] nfp: move link state interrupt request/free calls Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 03/15] nfp: break up nfp_net_{alloc|free}_rings Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 04/15] nfp: make *x_ring_init do all the init Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 05/15] nfp: allocate ring SW structs dynamically Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 06/15] nfp: cleanup tx ring flush and rename to reset Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 07/15] nfp: reorganize initial filling of RX rings Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 08/15] nfp: preallocate RX buffers early in .ndo_open Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 09/15] nfp: move filling ring information to FW config Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 10/15] nfp: slice .ndo_open() and .ndo_stop() up Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 11/15] nfp: sync ring state during FW reconfiguration Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 12/15] nfp: propagate list buffer size in struct rx_ring Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 13/15] nfp: convert .ndo_change_mtu() to prepare/commit paradigm Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 14/15] nfp: pass ring count as function parameter Jakub Kicinski
2016-04-07 18:39 ` [PATCH v5 net-next 15/15] nfp: allow ring size reconfiguration at runtime Jakub Kicinski
2016-04-08 19:34 ` [PATCH v5 net-next 00/15] MTU/buffer reconfig changes David Miller
2016-04-08 19:34   ` [PATCH v5 net-next 00/15] MTU/buffer reconfig changes, [PATCH net-next v2 1/2] bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_output, [PATCH v2 1/5] mmc: tmio: give read32/write32 functions more descriptive names, Re: [patch net-next 0/5] mlxsw: small driver update, Re: [RFC PATCH 07/11] GENEVE: Add option to mangle IP IDs on inner headers when using TSO, Re: [PATCH/RFC v2] gpio: rcar: Add Runtime PM handling for interrupts, [v3,4/6] arm64: dts: salvator-x: enable PWM David Miller, Daniel Borkmann, Wolfram Sang, David Miller, Jesse Gross, Laurent Pinchart, Ulrich Hecht
2016-03-31 11:39 [v3,6/6] clk: shmobile: r8a7795: add PWM clock Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 6/6] " Ulrich Hecht
2016-03-31 11:39 [v3,2/6] arm64: defconfig : add PWM driver support Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 2/6] " Ulrich Hecht
2016-03-31 11:39 [PATCH v3 0/6] R8A7795/Salvator-X PWM support Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 1/6] pwm: rcar: Use ARCH_RENESAS Ulrich Hecht
2016-04-06  1:18   ` [v3,1/6] " Simon Horman
2016-07-11  9:44   ` [PATCH v3 1/6] " Thierry Reding
2016-03-31 11:39 ` [PATCH v3 3/6] arm64: dts: r8a7795: Add PWM device nodes Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 4/6] arm64: dts: salvator-x: enable PWM Ulrich Hecht
2016-03-31 11:39 ` [PATCH v3 5/6] pwm: add R-Car H3 device tree bindings Ulrich Hecht
2016-04-06  1:23   ` [v3,5/6] " Simon Horman
2016-07-11  9:45   ` [PATCH v3 5/6] " Thierry Reding
2016-02-18 16:06 [PATCH/RFC v2] gpio: rcar: Add Runtime PM handling for interrupts Geert Uytterhoeven
2016-02-19  9:18 ` Linus Walleij
2016-02-19 11:59   ` Marc Zyngier
2016-04-11 16:26     ` Laurent Pinchart
2016-04-11 16:55       ` Marc Zyngier
2016-04-11 17:18         ` Geert Uytterhoeven
2016-04-12  8:06           ` Linus Walleij
2016-02-25  9:07 ` Linus Walleij
2016-02-25  9:37   ` Geert Uytterhoeven
2016-02-25 14:19     ` Linus Walleij
2016-02-25 14:26       ` Geert Uytterhoeven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160407223218.11142.26592.stgit@ahduyck-xeon-server \
    --to=aduyck@mirantis.com \
    --cc=alexander.duyck@gmail.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=jesse@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=tom@herbertland.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.