netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ido Schimmel <idosch@idosch.org>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, kuba@kernel.org, dsahern@gmail.com,
	petrm@nvidia.com, roopa@nvidia.com, nikolay@nvidia.com,
	ssuryaextr@gmail.com, mlxsw@nvidia.com,
	Ido Schimmel <idosch@nvidia.com>
Subject: [RFC PATCH net-next 03/10] ipv4: Add custom multipath hash policy
Date: Sun,  2 May 2021 19:22:50 +0300	[thread overview]
Message-ID: <20210502162257.3472453-4-idosch@idosch.org> (raw)
In-Reply-To: <20210502162257.3472453-1-idosch@idosch.org>

From: Ido Schimmel <idosch@nvidia.com>

Add a new multipath hash policy where the packet fields used for hash
calculation are determined by user space via the
fib_multipath_hash_fields sysctl that was introduced in the previous
patch.

The current set of available packet fields includes both outer and inner
fields, which requires two invocations of the flow dissector. Avoid
unnecessary dissection of the outer or inner flows by skipping
dissection if the 'need_outer' or 'need_inner' variables are not set.
These fields are set / cleared as part of the control path when the
fib_multipath_hash_fields sysctl is configured.

In accordance with the existing policies, when an skb is not available,
packet fields are extracted from the provided flow key. In which case,
only outer fields are considered.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 Documentation/networking/ip-sysctl.rst |   2 +
 net/ipv4/route.c                       | 121 +++++++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c             |   3 +-
 3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 8ab61f4edf02..549601494694 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -99,6 +99,8 @@ fib_multipath_hash_policy - INTEGER
 	- 0 - Layer 3
 	- 1 - Layer 4
 	- 2 - Layer 3 or inner Layer 3 if present
+	- 3 - Custom multipath hash. Fields used for multipath hash calculation
+	  are determined by fib_multipath_hash_fields sysctl
 
 fib_multipath_hash_fields - list of comma separated ranges
 	When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9d61e969446e..995799a6e06f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1906,6 +1906,121 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
+static u32 fib_multipath_custom_hash_outer(const struct net *net,
+					   const struct sk_buff *skb,
+					   bool *p_has_inner)
+{
+	unsigned long *hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+	struct flow_keys keys, hash_keys;
+
+	if (!net->ipv4.fib_multipath_hash_fields_need_outer)
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields))
+		hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields))
+		hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields))
+		hash_keys.basic.ip_proto = keys.basic.ip_proto;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields))
+		hash_keys.ports.src = keys.ports.src;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields))
+		hash_keys.ports.dst = keys.ports.dst;
+
+	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
+	return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 fib_multipath_custom_hash_inner(const struct net *net,
+					   const struct sk_buff *skb,
+					   bool has_inner)
+{
+	unsigned long *hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+	struct flow_keys keys, hash_keys;
+
+	/* We assume the packet carries an encapsulation, but if none was
+	 * encountered during dissection of the outer flow, then there is no
+	 * point in calling the flow dissector again.
+	 */
+	if (!has_inner)
+		return 0;
+
+	if (!net->ipv4.fib_multipath_hash_fields_need_inner)
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
+		return 0;
+
+	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields))
+			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields))
+			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_IP, hash_fields))
+			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_IP, hash_fields))
+			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+		if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_FLOWLABEL, hash_fields))
+			hash_keys.tags.flow_label = keys.tags.flow_label;
+	}
+
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_IP_PROTO, hash_fields))
+		hash_keys.basic.ip_proto = keys.basic.ip_proto;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_SRC_PORT, hash_fields))
+		hash_keys.ports.src = keys.ports.src;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(INNER_DST_PORT, hash_fields))
+		hash_keys.ports.dst = keys.ports.dst;
+
+	return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 fib_multipath_custom_hash_skb(const struct net *net,
+					 const struct sk_buff *skb)
+{
+	u32 mhash, mhash_inner;
+	bool has_inner = true;
+
+	mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
+	mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);
+
+	return jhash_2words(mhash, mhash_inner, 0);
+}
+
+static u32 fib_multipath_custom_hash_fl4(const struct net *net,
+					 const struct flowi4 *fl4)
+{
+	unsigned long *hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+	struct flow_keys hash_keys;
+
+	if (!net->ipv4.fib_multipath_hash_fields_need_outer)
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_IP, hash_fields))
+		hash_keys.addrs.v4addrs.src = fl4->saddr;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_IP, hash_fields))
+		hash_keys.addrs.v4addrs.dst = fl4->daddr;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(IP_PROTO, hash_fields))
+		hash_keys.basic.ip_proto = fl4->flowi4_proto;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(SRC_PORT, hash_fields))
+		hash_keys.ports.src = fl4->fl4_sport;
+	if (FIB_MULTIPATH_HASH_TEST_FIELD(DST_PORT, hash_fields))
+		hash_keys.ports.dst = fl4->fl4_dport;
+
+	return flow_hash_from_keys(&hash_keys);
+}
+
 /* if skb is set it will be used and fl4 can be NULL */
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
@@ -1991,6 +2106,12 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		}
 		mhash = flow_hash_from_keys(&hash_keys);
 		break;
+	case 3:
+		if (skb)
+			mhash = fib_multipath_custom_hash_skb(net, skb);
+		else
+			mhash = fib_multipath_custom_hash_fl4(net, fl4);
+		break;
 	}
 
 	if (multipath_hash)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0db7e68c38cd..988defcd8ae3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -30,6 +30,7 @@
 #include <net/netevent.h>
 
 static int two = 2;
+static int three __maybe_unused = 3;
 static int four = 4;
 static int thousand = 1000;
 static int tcp_retr1_max = 255;
@@ -1075,7 +1076,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_fib_multipath_hash_policy,
 		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
+		.extra2		= &three,
 	},
 	{
 		.procname	= "fib_multipath_hash_fields",
-- 
2.30.2


  parent reply	other threads:[~2021-05-02 16:24 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-02 16:22 [RFC PATCH net-next 00/10] Add support for custom multipath hash Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 01/10] ipv4: Calculate multipath hash inside switch statement Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 02/10] ipv4: Add a sysctl to control multipath hash fields Ido Schimmel
2021-05-04  2:33   ` David Ahern
2021-05-05  7:48     ` Ido Schimmel
2021-05-02 16:22 ` Ido Schimmel [this message]
2021-05-04  2:38   ` [RFC PATCH net-next 03/10] ipv4: Add custom multipath hash policy David Ahern
2021-05-04  2:40     ` David Ahern
2021-05-05  8:45       ` Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 04/10] ipv6: Use a more suitable label name Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 05/10] ipv6: Calculate multipath hash inside switch statement Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 06/10] ipv6: Add a sysctl to control multipath hash fields Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 07/10] ipv6: Add custom multipath hash policy Ido Schimmel
2021-05-04  2:46   ` David Ahern
2021-05-05  8:50     ` Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 08/10] selftests: forwarding: Add test for custom multipath hash Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 09/10] selftests: forwarding: Add test for custom multipath hash with IPv4 GRE Ido Schimmel
2021-05-02 16:22 ` [RFC PATCH net-next 10/10] selftests: forwarding: Add test for custom multipath hash with IPv6 GRE Ido Schimmel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210502162257.3472453-4-idosch@idosch.org \
    --to=idosch@idosch.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@gmail.com \
    --cc=idosch@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=mlxsw@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=nikolay@nvidia.com \
    --cc=petrm@nvidia.com \
    --cc=roopa@nvidia.com \
    --cc=ssuryaextr@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).