All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Ahern <dsahern@gmail.com>
To: netdev@vger.kernel.org
Cc: idosch@idosch.org, davem@davemloft.net,
	roopa@cumulusnetworks.com, nikolay@cumulusnetworks.com,
	tom@herbertland.com, David Ahern <dsahern@gmail.com>
Subject: [PATCH v2 net-next 07/10] net/ipv6: Add support for path selection using hash of 5-tuple
Date: Thu,  1 Mar 2018 19:29:47 -0800	[thread overview]
Message-ID: <20180302032950.21154-8-dsahern@gmail.com> (raw)
In-Reply-To: <20180302032950.21154-1-dsahern@gmail.com>

Some operators prefer IPv6 path selection to use a standard 5-tuple
hash rather than just an L3 hash with the flow the label. To that end
add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
("net: ipv4: add support for ECMP hash policy choice"). The default
is still L3 which covers source and destination addresses along with
flow label and IPv6 protocol.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 Documentation/networking/ip-sysctl.txt |  7 ++++
 include/net/ip6_route.h                |  4 +-
 include/net/netevent.h                 |  1 +
 include/net/netns/ipv6.h               |  1 +
 net/ipv6/icmp.c                        |  2 +-
 net/ipv6/route.c                       | 68 ++++++++++++++++++++++++++--------
 net/ipv6/sysctl_net_ipv6.c             | 27 ++++++++++++++
 7 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..783675a730e5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
 	FALSE: disabled
 	Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes.
+	Default: 0 (Layer 3)
+	Possible values:
+	0 - Layer 3 (source and destination addresses plus flow label)
+	1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9594f9317952..ce2abc0ff102 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif,
 			    const struct sk_buff *skb, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
-		       struct flow_keys *hkeys);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
diff --git a/include/net/netevent.h b/include/net/netevent.h
index baee605a94ab..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -27,6 +27,7 @@ enum netevent_notif_type {
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index e286fda09fcf..5b51110435fc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_gc_elasticity;
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
+	int multipath_hash_policy;
 	int flowlabel_consistency;
 	int auto_flowlabels;
 	int icmpv6_time;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a5d929223820..6f84668be6ea 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-	fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
+	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d2b8368663cb..f0ae58424c45 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+					     struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     const struct sk_buff *skb,
 					     int strict)
@@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL);
+		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -932,7 +933,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 		rt = rt6_device_match(net, rt, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif,
+			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
 						  skb, flags);
 	}
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
 	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, skb, strict);
+		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
@@ -1839,21 +1840,56 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
-		       struct flow_keys *flkeys)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
 	u32 mhash;
 
-	memset(&hash_keys, 0, sizeof(hash_keys));
-	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	if (skb) {
-		ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
-	} else {
-		hash_keys.addrs.v6addrs.src = fl6->saddr;
-		hash_keys.addrs.v6addrs.dst = fl6->daddr;
-		hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
-		hash_keys.basic.ip_proto = fl6->flowi6_proto;
+	switch (net->ipv6.sysctl.multipath_hash_policy) {
+	case 0:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (skb) {
+			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+		} else {
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
+	case 1:
+		if (skb) {
+			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+			struct flow_keys keys;
+
+			/* short-circuit if we already have L4 hash present */
+			if (skb->l4_hash)
+				return skb_get_hash_raw(skb) >> 1;
+
+			memset(&hash_keys, 0, sizeof(hash_keys));
+
+                        if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				flkeys = &keys;
+			}
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+		} else {
+			memset(&hash_keys, 0, sizeof(hash_keys));
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.ports.src = fl6->fl6_sport;
+			hash_keys.ports.dst = fl6->fl6_dport;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
@@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb)
 		flkeys = &_flkeys;
 
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-		fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
+		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
 	skb_dst_drop(skb);
 	skb_dst_set(skb,
 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 262f791f1b9b..966c42af92f4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/netevent.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
 
+static int zero;
 static int one = 1;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv6.sysctl.multipath_hash_policy);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+	return ret;
+}
 
 static struct ctl_table ipv6_table_template[] = {
 	{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fib_multipath_hash_policy",
+		.data		= &init_net.ipv6.sysctl.multipath_hash_policy,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_rt6_multipath_hash_policy,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
 	ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
 	ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+	ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
2.11.0

  parent reply	other threads:[~2018-03-02  3:30 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-02  3:29 [PATCH v2 net-next 00/10] net/ipv6: Add support for path selection using hash of 5-tuple David Ahern
2018-03-02  3:29 ` [PATCH v2 net-next 01/10] net/ipv4: Pass net to fib_multipath_hash instead of fib_info David Ahern
2018-03-02 13:12   ` Ido Schimmel
2018-03-02 15:28     ` David Ahern
2018-03-02  3:29 ` [PATCH v2 net-next 02/10] net: Align ip_multipath_l3_keys and ip6_multipath_l3_keys David Ahern
2018-03-02 13:22   ` Ido Schimmel
2018-03-02  3:29 ` [PATCH v2 net-next 03/10] net/ipv4: Simplify fib_multipath_hash with optional flow keys David Ahern
2018-03-02 13:26   ` Ido Schimmel
2018-03-02  3:29 ` [PATCH v2 net-next 04/10] net/ipv6: Make rt6_multipath_hash similar to fib_multipath_hash David Ahern
2018-03-02 13:35   ` Ido Schimmel
2018-03-02  3:29 ` [PATCH v2 net-next 05/10] net: Rename NETEVENT_MULTIPATH_HASH_UPDATE David Ahern
2018-03-02 13:36   ` Ido Schimmel
2018-03-02  3:29 ` [PATCH v2 net-next 06/10] net/ipv6: Pass skb to route lookup David Ahern
2018-03-02 13:49   ` Ido Schimmel
2018-03-02  3:29 ` David Ahern [this message]
2018-03-02 13:52   ` [PATCH v2 net-next 07/10] net/ipv6: Add support for path selection using hash of 5-tuple Ido Schimmel
2018-03-02  3:29 ` [PATCH v2 net-next 08/10] mlxsw: spectrum_router: Add support for ipv6 hash policy update David Ahern
2018-03-02 14:02   ` Ido Schimmel
2018-03-02 15:33     ` David Ahern
2018-03-02  3:29 ` [PATCH v2 net-next 09/10] net: Remove unused get_hash_from_flow functions David Ahern
2018-03-02 14:09   ` Ido Schimmel
2018-03-02 15:33     ` David Ahern
2018-03-02  3:29 ` [PATCH v2 net-next 10/10] selftests: forwarding: Add multipath test for L4 hashing David Ahern
2018-03-02 14:10   ` Ido Schimmel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180302032950.21154-8-dsahern@gmail.com \
    --to=dsahern@gmail.com \
    --cc=davem@davemloft.net \
    --cc=idosch@idosch.org \
    --cc=netdev@vger.kernel.org \
    --cc=nikolay@cumulusnetworks.com \
    --cc=roopa@cumulusnetworks.com \
    --cc=tom@herbertland.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.