netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel
@ 2019-06-11  0:31 Stephen Suryaputra
  2019-06-11  7:29 ` Nikolay Aleksandrov
  0 siblings, 1 reply; 3+ messages in thread
From: Stephen Suryaputra @ 2019-06-11  0:31 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Suryaputra

Multipath hash policy value of 0 isn't distributing since the outer IP
dest and src aren't varied eventhough the inner ones are. Since the flow
is on the inner ones in the case of tunneled traffic, hashing on them is
desired.

This currently only supports IP over GRE and CONFIG_NET_GRE_DEMUX must
be compiled as built-in in the kernel.

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
---
 Documentation/networking/ip-sysctl.txt |  4 ++
 net/ipv4/route.c                       | 75 ++++++++++++++++++++++----
 net/ipv4/sysctl_net_ipv4.c             |  2 +-
 3 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 5eedc6941ce5..4f1e18713ea4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -80,6 +80,10 @@ fib_multipath_hash_policy - INTEGER
 	Possible values:
 	0 - Layer 3
 	1 - Layer 4
+	2 - Inner Layer 3 for tunneled IP packets. Only IP tunneled by GRE is
+	    supported now. Others are treated as if the control is set to 0,
+	    i.e. the outer L3 is used. GRE support is only valid when the kernel
+	    was compiled with CONFIG_NET_GRE_DEMUX.
 
 fib_sync_mem - UNSIGNED INTEGER
 	Amount of dirty memory from fib entries that can be backlogged before
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 14c7fdacaa72..92c693ee8d4b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,6 +112,7 @@
 #include <net/secure_seq.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
+#include <net/gre.h>
 
 #include "fib_lookup.h"
 
@@ -1782,23 +1783,29 @@ static int __mkroute_input(struct sk_buff *skb,
  * calculated from the inner IP addresses.
  */
 static void ip_multipath_l3_keys(const struct sk_buff *skb,
-				 struct flow_keys *hash_keys)
+				 struct flow_keys *hash_keys,
+				 const struct iphdr *outer_iph,
+				 int offset)
 {
-	const struct iphdr *outer_iph = ip_hdr(skb);
-	const struct iphdr *key_iph = outer_iph;
 	const struct iphdr *inner_iph;
+	const struct iphdr *key_iph;
 	const struct icmphdr *icmph;
 	struct iphdr _inner_iph;
 	struct icmphdr _icmph;
 
+	if (!outer_iph)
+		outer_iph = ip_hdr(skb);
+
+	key_iph = ip_hdr(skb);
+
 	if (likely(outer_iph->protocol != IPPROTO_ICMP))
 		goto out;
 
 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
 		goto out;
 
-	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
-				   &_icmph);
+	icmph = skb_header_pointer(skb, offset + outer_iph->ihl * 4,
+				   sizeof(_icmph), &_icmph);
 	if (!icmph)
 		goto out;
 
@@ -1820,6 +1827,47 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
+static void ip_multipath_inner_l3_keys(const struct sk_buff *skb,
+				       struct flow_keys *hash_keys)
+{
+	const struct iphdr *outer_iph = ip_hdr(skb);
+	const struct iphdr *inner_iph;
+	struct iphdr _inner_iph;
+	int hdr_len;
+
+	switch (outer_iph->protocol) {
+#ifdef CONFIG_NET_GRE_DEMUX
+	case IPPROTO_GRE:
+		{
+			struct tnl_ptk_info tpi;
+			bool csum_err = false;
+
+			hdr_len = gre_parse_header(skb, &tpi, &csum_err,
+						   htons(ETH_P_IP),
+						   outer_iph->ihl * 4);
+			if (hdr_len > 0 && tpi.proto == htons(ETH_P_IP))
+				break;
+		}
+		/* fallthrough */
+#endif
+	default:
+		/* Hash on outer for unknown tunnels, non IP tunneled, or non-
+		 * tunneled pkts
+		 */
+		ip_multipath_l3_keys(skb, hash_keys, outer_iph, 0);
+		return;
+	}
+	inner_iph = skb_header_pointer(skb,
+				       outer_iph->ihl * 4 + hdr_len,
+				       sizeof(struct iphdr), &_inner_iph);
+	if (inner_iph) {
+		ip_multipath_l3_keys(skb, hash_keys, inner_iph, hdr_len);
+	} else {
+		/* Use outer */
+		ip_multipath_l3_keys(skb, hash_keys, outer_iph, 0);
+	}
+}
+
 /* if skb is set it will be used and fl4 can be NULL */
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
@@ -1828,12 +1876,13 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 	struct flow_keys hash_keys;
 	u32 mhash;
 
+	memset(&hash_keys, 0, sizeof(hash_keys));
+
 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
 	case 0:
-		memset(&hash_keys, 0, sizeof(hash_keys));
 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 		if (skb) {
-			ip_multipath_l3_keys(skb, &hash_keys);
+			ip_multipath_l3_keys(skb, &hash_keys, NULL, 0);
 		} else {
 			hash_keys.addrs.v4addrs.src = fl4->saddr;
 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
@@ -1849,8 +1898,6 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			if (skb->l4_hash)
 				return skb_get_hash_raw(skb) >> 1;
 
-			memset(&hash_keys, 0, sizeof(hash_keys));
-
 			if (!flkeys) {
 				skb_flow_dissect_flow_keys(skb, &keys, flag);
 				flkeys = &keys;
@@ -1863,7 +1910,6 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			hash_keys.ports.dst = flkeys->ports.dst;
 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
 		} else {
-			memset(&hash_keys, 0, sizeof(hash_keys));
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 			hash_keys.addrs.v4addrs.src = fl4->saddr;
 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
@@ -1872,6 +1918,15 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			hash_keys.basic.ip_proto = fl4->flowi4_proto;
 		}
 		break;
+	case 2:
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		if (skb) {
+			ip_multipath_inner_l3_keys(skb, &hash_keys);
+		} else {
+			hash_keys.addrs.v4addrs.src = fl4->saddr;
+			hash_keys.addrs.v4addrs.dst = fl4->daddr;
+		}
+		break;
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2316c08e9591..e1efc2e62d21 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -960,7 +960,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_fib_multipath_hash_policy,
 		.extra1		= &zero,
-		.extra2		= &one,
+		.extra2		= &two,
 	},
 #endif
 	{
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel
  2019-06-11  0:31 [PATCH net-next] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel Stephen Suryaputra
@ 2019-06-11  7:29 ` Nikolay Aleksandrov
  2019-06-13 15:32   ` Stephen Suryaputra
  0 siblings, 1 reply; 3+ messages in thread
From: Nikolay Aleksandrov @ 2019-06-11  7:29 UTC (permalink / raw)
  To: Stephen Suryaputra, netdev

On 11/06/2019 03:31, Stephen Suryaputra wrote:
> Multipath hash policy value of 0 isn't distributing since the outer IP
> dest and src aren't varied eventhough the inner ones are. Since the flow
> is on the inner ones in the case of tunneled traffic, hashing on them is
> desired.
> 
> This currently only supports IP over GRE and CONFIG_NET_GRE_DEMUX must
> be compiled as built-in in the kernel.
> 
> Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
> ---
>  Documentation/networking/ip-sysctl.txt |  4 ++
>  net/ipv4/route.c                       | 75 ++++++++++++++++++++++----
>  net/ipv4/sysctl_net_ipv4.c             |  2 +-
>  3 files changed, 70 insertions(+), 11 deletions(-)
> 
[snip]
> @@ -1820,6 +1827,47 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
>  	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
>  }
>  
> +static void ip_multipath_inner_l3_keys(const struct sk_buff *skb,
> +				       struct flow_keys *hash_keys)
> +{
> +	const struct iphdr *outer_iph = ip_hdr(skb);
> +	const struct iphdr *inner_iph;
> +	struct iphdr _inner_iph;
> +	int hdr_len;
> +
> +	switch (outer_iph->protocol) {
> +#ifdef CONFIG_NET_GRE_DEMUX
> +	case IPPROTO_GRE:
> +		{
> +			struct tnl_ptk_info tpi;
> +			bool csum_err = false;
> +
> +			hdr_len = gre_parse_header(skb, &tpi, &csum_err,
> +						   htons(ETH_P_IP),
> +						   outer_iph->ihl * 4);
> +			if (hdr_len > 0 && tpi.proto == htons(ETH_P_IP))
> +				break;

Have you considered using the flow dissector and doing something similar to the bonding ?
It does a full flow dissect via skb_flow_dissect_flow_keys() and uses whatever headers
it needs, but that will support any tunneling protocol which the flow dissector
recognizes and will be improved upon automatically by people adding to it.
Also would avoid doing dissection by yourself.

The bond commit which added that was:
 32819dc18348 ("bonding: modify the old and add new xmit hash policies")

> +		}
> +		/* fallthrough */
> +#endif
> +	default:
> +		/* Hash on outer for unknown tunnels, non IP tunneled, or non-
> +		 * tunneled pkts
> +		 */
> +		ip_multipath_l3_keys(skb, hash_keys, outer_iph, 0);
> +		return;
> +	}
> +	inner_iph = skb_header_pointer(skb,
> +				       outer_iph->ihl * 4 + hdr_len,
> +				       sizeof(struct iphdr), &_inner_iph);
> +	if (inner_iph) {
> +		ip_multipath_l3_keys(skb, hash_keys, inner_iph, hdr_len);
> +	} else {
> +		/* Use outer */
> +		ip_multipath_l3_keys(skb, hash_keys, outer_iph, 0);
> +	}
> +}
> +
>  /* if skb is set it will be used and fl4 can be NULL */
>  int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
>  		       const struct sk_buff *skb, struct flow_keys *flkeys)
> @@ -1828,12 +1876,13 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
>  	struct flow_keys hash_keys;
>  	u32 mhash;
>  
> +	memset(&hash_keys, 0, sizeof(hash_keys));
> +

This was an optimization, it was done on purpose to avoid doing anything when we
have L3+4 configured (1) and the skb has its hash already calculated.

>  	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
>  	case 0:
> -		memset(&hash_keys, 0, sizeof(hash_keys));
>  		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
>  		if (skb) {
> -			ip_multipath_l3_keys(skb, &hash_keys);
> +			ip_multipath_l3_keys(skb, &hash_keys, NULL, 0);
>  		} else {
>  			hash_keys.addrs.v4addrs.src = fl4->saddr;
>  			hash_keys.addrs.v4addrs.dst = fl4->daddr;
> @@ -1849,8 +1898,6 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
>  			if (skb->l4_hash)
>  				return skb_get_hash_raw(skb) >> 1;
>  
> -			memset(&hash_keys, 0, sizeof(hash_keys));
> -
>  			if (!flkeys) {
>  				skb_flow_dissect_flow_keys(skb, &keys, flag);
>  				flkeys = &keys;
> @@ -1863,7 +1910,6 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
>  			hash_keys.ports.dst = flkeys->ports.dst;
>  			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
>  		} else {
> -			memset(&hash_keys, 0, sizeof(hash_keys));
>  			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
>  			hash_keys.addrs.v4addrs.src = fl4->saddr;
>  			hash_keys.addrs.v4addrs.dst = fl4->daddr;
> @@ -1872,6 +1918,15 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
>  			hash_keys.basic.ip_proto = fl4->flowi4_proto;
>  		}
>  		break;
> +	case 2:
> +		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> +		if (skb) {
> +			ip_multipath_inner_l3_keys(skb, &hash_keys);
> +		} else {
> +			hash_keys.addrs.v4addrs.src = fl4->saddr;
> +			hash_keys.addrs.v4addrs.dst = fl4->daddr;
> +		}
> +		break;
>  	}
>  	mhash = flow_hash_from_keys(&hash_keys);
>  
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 2316c08e9591..e1efc2e62d21 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -960,7 +960,7 @@ static struct ctl_table ipv4_net_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_fib_multipath_hash_policy,
>  		.extra1		= &zero,
> -		.extra2		= &one,
> +		.extra2		= &two,
>  	},
>  #endif
>  	{
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel
  2019-06-11  7:29 ` Nikolay Aleksandrov
@ 2019-06-13 15:32   ` Stephen Suryaputra
  0 siblings, 0 replies; 3+ messages in thread
From: Stephen Suryaputra @ 2019-06-13 15:32 UTC (permalink / raw)
  To: Nikolay Aleksandrov; +Cc: netdev

On Tue, Jun 11, 2019 at 10:29:56AM +0300, Nikolay Aleksandrov wrote:
> 
> Have you considered using the flow dissector and doing something similar to the bonding ?
> It does a full flow dissect via skb_flow_dissect_flow_keys() and uses whatever headers
> it needs, but that will support any tunneling protocol which the flow dissector
> recognizes and will be improved upon automatically by people adding to it.
> Also would avoid doing dissection by yourself.
> 
> The bond commit which added that was:
>  32819dc18348 ("bonding: modify the old and add new xmit hash policies")
> 

I didn't consider it and should. Thanks for pointing me to that
direction. It's simpler.

> >  /* if skb is set it will be used and fl4 can be NULL */
> >  int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
> >  		       const struct sk_buff *skb, struct flow_keys *flkeys)
> > @@ -1828,12 +1876,13 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
> >  	struct flow_keys hash_keys;
> >  	u32 mhash;
> >  
> > +	memset(&hash_keys, 0, sizeof(hash_keys));
> > +
> 
> This was an optimization, it was done on purpose to avoid doing anything when we
> have L3+4 configured (1) and the skb has its hash already calculated.
> 
Will revert to the original lines.

Thanks.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-06-13 15:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-11  0:31 [PATCH net-next] ipv4: Support multipath hashing on inner IP pkts for GRE tunnel Stephen Suryaputra
2019-06-11  7:29 ` Nikolay Aleksandrov
2019-06-13 15:32   ` Stephen Suryaputra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).