All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match
@ 2018-02-11 22:26 Roopa Prabhu
  2018-02-11 22:26 ` [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto Roopa Prabhu
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-11 22:26 UTC (permalink / raw)
  To: netdev; +Cc: dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>

This series extends fib rule match support to include sport, dport
and ip proto match (to complete the 5-tuple match support).
Common use-cases of Policy based routing in the data center require
5-tuple match. The last 2 patches in the series add a call to flow dissect
in the fwd path if required by the installed fib rules (controlled by a flag).

Roopa Prabhu (4):
  ipv4: fib_rules: support match on sport, dport and ip proto
  ipv6: fib6_rules: support for match on sport, dport and ip proto
  ipv4: route: dissect flow in input path if fib rules need it
  ipv6: route: dissect flow in input path if fib rules need it

------
sending this as RFC for first review (as net-next is closed)

 include/net/ip6_route.h        |  3 ++-
 include/net/ip_fib.h           |  2 +-
 include/net/netns/ipv4.h       |  1 +
 include/net/netns/ipv6.h       |  1 +
 include/uapi/linux/fib_rules.h |  3 +++
 net/ipv4/fib_rules.c           | 55 ++++++++++++++++++++++++++++++++++++++--
 net/ipv4/fib_semantics.c       |  2 +-
 net/ipv4/route.c               | 46 ++++++++++++++++++++++++++--------
 net/ipv6/fib6_rules.c          | 57 +++++++++++++++++++++++++++++++++++++++---
 net/ipv6/icmp.c                |  2 +-
 net/ipv6/route.c               | 46 +++++++++++++++++++++++++++-------
 11 files changed, 189 insertions(+), 29 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-11 22:26 [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match Roopa Prabhu
@ 2018-02-11 22:26 ` Roopa Prabhu
  2018-02-12 20:49   ` Eric Dumazet
  2018-02-11 22:26 ` [PATCH RFC net-next 2/4] ipv6: fib6_rules: support for " Roopa Prabhu
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-11 22:26 UTC (permalink / raw)
  To: netdev; +Cc: dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>

Add support to match on src port, dst port and ip protocol.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/uapi/linux/fib_rules.h |  3 +++
 net/ipv4/fib_rules.c           | 46 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf..79ff3c2 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -58,6 +58,9 @@ enum {
 	FRA_PAD,
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
+	FRA_PROTO,	/* ip proto */
+	FRA_SPORT,	/* sport */
+	FRA_DPORT,	/* dport */
 	__FRA_MAX
 };
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a..f204c85 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -45,13 +45,17 @@ struct fib4_rule {
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	u32			tclassid;
 #endif
+	__be16			sport;
+	__be16			dport;
+	u8			proto;
 };
 
 static bool fib4_rule_matchall(const struct fib_rule *rule)
 {
 	struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
 
-	if (r->dst_len || r->src_len || r->tos)
+	if (r->dst_len || r->src_len || r->tos || r->proto || r->sport ||
+	    r->dport)
 		return false;
 	return fib_rule_matchall(rule);
 }
@@ -182,6 +186,15 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tos && (r->tos != fl4->flowi4_tos))
 		return 0;
 
+	if (r->proto && (r->proto != fl4->flowi4_proto))
+		return 0;
+
+	if (r->sport && (r->sport != fl4->fl4_sport))
+		return 0;
+
+	if (r->dport && (r->dport != fl4->fl4_dport))
+		return 0;
+
 	return 1;
 }
 
@@ -243,6 +256,14 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 			net->ipv4.fib_num_tclassid_users++;
 	}
 #endif
+	if (tb[FRA_PROTO])
+		rule4->proto = nla_get_u8(tb[FRA_PROTO]);
+
+	if (tb[FRA_DPORT])
+		rule4->dport = nla_get_be16(tb[FRA_DPORT]);
+
+	if (tb[FRA_SPORT])
+		rule4->sport = nla_get_be16(tb[FRA_SPORT]);
 
 	rule4->src_len = frh->src_len;
 	rule4->srcmask = inet_make_mask(rule4->src_len);
@@ -301,6 +322,15 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST])))
 		return 0;
 
+	if (tb[FRA_PROTO] && (rule4->proto != nla_get_u8(tb[FRA_PROTO])))
+		return 0;
+
+	if (tb[FRA_SPORT] && (rule4->sport != nla_get_be16(tb[FRA_SPORT])))
+		return 0;
+
+	if (tb[FRA_DPORT] && (rule4->dport != nla_get_be16(tb[FRA_DPORT])))
+		return 0;
+
 	return 1;
 }
 
@@ -323,6 +353,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	    nla_put_u32(skb, FRA_FLOW, rule4->tclassid))
 		goto nla_put_failure;
 #endif
+	if (rule4->proto &&
+	    nla_put_u8(skb, FRA_PROTO, rule4->proto))
+		goto nla_put_failure;
+	if (rule4->sport &&
+	    nla_put_be16(skb, FRA_SPORT, rule4->sport))
+		goto nla_put_failure;
+	if (rule4->dport &&
+	    nla_put_be16(skb, FRA_DPORT, rule4->dport))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -333,7 +372,10 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 {
 	return nla_total_size(4) /* dst */
 	       + nla_total_size(4) /* src */
-	       + nla_total_size(4); /* flow */
+	       + nla_total_size(4) /* flow */
+	       + nla_total_size(1) /* proto */
+	       + nla_total_size(2) /* sport */
+	       + nla_total_size(2); /* dport */
 }
 
 static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC net-next 2/4] ipv6: fib6_rules: support for match on sport, dport and ip proto
  2018-02-11 22:26 [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match Roopa Prabhu
  2018-02-11 22:26 ` [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto Roopa Prabhu
@ 2018-02-11 22:26 ` Roopa Prabhu
  2018-02-11 22:26 ` [PATCH RFC net-next 3/4] ipv4: route: dissect flow in input path if fib rules need it Roopa Prabhu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-11 22:26 UTC (permalink / raw)
  To: netdev; +Cc: dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>

Add support to match on src port, dst port and ip protocol.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 net/ipv6/fib6_rules.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24..57c0836 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -28,13 +28,17 @@ struct fib6_rule {
 	struct rt6key		src;
 	struct rt6key		dst;
 	u8			tclass;
+	u8			proto;
+	__be16			sport;
+	__be16			dport;
 };
 
 static bool fib6_rule_matchall(const struct fib_rule *rule)
 {
 	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
 
-	if (r->dst.plen || r->src.plen || r->tclass)
+	if (r->dst.plen || r->src.plen || r->tclass || r->proto || r->sport ||
+	    r->dport)
 		return false;
 	return fib_rule_matchall(rule);
 }
@@ -223,6 +227,15 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
 		return 0;
 
+	if (r->proto && r->proto != fl6->flowi6_proto)
+		return 0;
+
+	if (r->sport && r->sport != fl6->fl6_sport)
+		return 0;
+
+	if (r->dport && r->dport != fl6->fl6_dport)
+		return 0;
+
 	return 1;
 }
 
@@ -258,6 +271,15 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	if (tb[FRA_PROTO])
+		rule6->proto = nla_get_u8(tb[FRA_PROTO]);
+
+	if (tb[FRA_SPORT])
+		rule6->sport = nla_get_be16(tb[FRA_SPORT]);
+
+	if (tb[FRA_DPORT])
+		rule6->dport = nla_get_be16(tb[FRA_DPORT]);
+
 	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
@@ -286,6 +308,18 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
 		return 0;
 
+	if (tb[FRA_PROTO] &&
+	    (rule6->proto != nla_get_u8(tb[FRA_PROTO])))
+		return 0;
+
+	if (tb[FRA_SPORT] &&
+	    (rule6->sport != nla_get_be32(tb[FRA_SPORT])))
+		return 0;
+
+	if (tb[FRA_DPORT] &&
+	    (rule6->dport != nla_get_be32(tb[FRA_DPORT])))
+		return 0;
+
 	return 1;
 }
 
@@ -301,7 +335,13 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	if ((rule6->dst.plen &&
 	     nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
 	    (rule6->src.plen &&
-	     nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr)))
+	     nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr)) ||
+	    (rule6->proto &&
+		nla_put_u8(skb, FRA_PROTO, rule6->proto)) ||
+	    (rule6->sport &&
+		nla_put_be16(skb, FRA_SPORT, rule6->sport)) ||
+	    (rule6->dport &&
+		nla_put_be16(skb, FRA_DPORT, rule6->dport)))
 		goto nla_put_failure;
 	return 0;
 
@@ -312,7 +352,10 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 {
 	return nla_total_size(16) /* dst */
-	       + nla_total_size(16); /* src */
+	       + nla_total_size(16) /* src */
+	       + nla_total_size(1) /* proto */
+	       + nla_total_size(2) /* sport */
+	       + nla_total_size(2); /* dport */
 }
 
 static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC net-next 3/4] ipv4: route: dissect flow in input path if fib rules need it
  2018-02-11 22:26 [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match Roopa Prabhu
  2018-02-11 22:26 ` [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto Roopa Prabhu
  2018-02-11 22:26 ` [PATCH RFC net-next 2/4] ipv6: fib6_rules: support for " Roopa Prabhu
@ 2018-02-11 22:26 ` Roopa Prabhu
  2018-02-11 22:26 ` [PATCH RFC net-next 4/4] ipv6: " Roopa Prabhu
  2018-02-12 20:36 ` [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match David Miller
  4 siblings, 0 replies; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-11 22:26 UTC (permalink / raw)
  To: netdev; +Cc: dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>

Dissect flow in fwd path if fib rules require it. Controlled by
a flag to avoid penatly for the common case. Flag is set when fib
rules with sport, dport and proto match that require flow dissect
are installed. Also passes the dissected hash keys to the multipath
hash function when applicable to avoid dissecting the flow again.
icmp packets will continue to use inner header for hash
calculations (Thanks to Nikolay Aleksandrov for some review here).

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/net/ip_fib.h     |  2 +-
 include/net/netns/ipv4.h |  1 +
 net/ipv4/fib_rules.c     | 13 ++++++++++---
 net/ipv4/fib_semantics.c |  2 +-
 net/ipv4/route.c         | 49 +++++++++++++++++++++++++++++++++++-------------
 5 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f805243..5ada772 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -371,7 +371,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb);
+		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c2..87b8fdc 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -52,6 +52,7 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
+	bool			fib_rules_require_fldissect;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
 #endif
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f204c85..fb6a637 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -256,14 +256,20 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 			net->ipv4.fib_num_tclassid_users++;
 	}
 #endif
-	if (tb[FRA_PROTO])
+	if (tb[FRA_PROTO]) {
 		rule4->proto = nla_get_u8(tb[FRA_PROTO]);
+		net->ipv4.fib_rules_require_fldissect = true;
+	}
 
-	if (tb[FRA_DPORT])
+	if (tb[FRA_DPORT]) {
 		rule4->dport = nla_get_be16(tb[FRA_DPORT]);
+		net->ipv4.fib_rules_require_fldissect = true;
+	}
 
-	if (tb[FRA_SPORT])
+	if (tb[FRA_SPORT]) {
 		rule4->sport = nla_get_be16(tb[FRA_SPORT]);
+		net->ipv4.fib_rules_require_fldissect = true;
+	}
 
 	rule4->src_len = frh->src_len;
 	rule4->srcmask = inet_make_mask(rule4->src_len);
@@ -431,6 +437,7 @@ int __net_init fib4_rules_init(struct net *net)
 		goto fail;
 	net->ipv4.rules_ops = ops;
 	net->ipv4.fib_has_custom_rules = false;
+	net->ipv4.fib_rules_require_fldissect = false;
 	return 0;
 
 fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c586597..88f66e6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1767,7 +1767,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi->fib_nhs > 1 && oif_check) {
-		int h = fib_multipath_hash(res->fi, fl4, skb);
+		int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
 
 		fib_select_multipath(res, h);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49cc1c1..8889d5e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1798,7 +1798,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 
 /* if skb is set it will be used and fl4 can be NULL */
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb)
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct net *net = fi->fib_net;
 	struct flow_keys hash_keys;
@@ -1825,12 +1825,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 			if (skb->l4_hash)
 				return skb_get_hash_raw(skb) >> 1;
 			memset(&hash_keys, 0, sizeof(hash_keys));
-			skb_flow_dissect_flow_keys(skb, &keys, flag);
-			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-			hash_keys.ports.src = keys.ports.src;
-			hash_keys.ports.dst = keys.ports.dst;
-			hash_keys.basic.ip_proto = keys.basic.ip_proto;
+			if (flkeys) {
+				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+				hash_keys.ports.src = flkeys->ports.src;
+				hash_keys.ports.dst = flkeys->ports.dst;
+				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+			} else {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+				hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+				hash_keys.ports.src = keys.ports.src;
+				hash_keys.ports.dst = keys.ports.dst;
+				hash_keys.basic.ip_proto = keys.basic.ip_proto;
+			}
 		} else {
 			memset(&hash_keys, 0, sizeof(hash_keys));
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1852,11 +1860,12 @@ EXPORT_SYMBOL_GPL(fib_multipath_hash);
 static int ip_mkroute_input(struct sk_buff *skb,
 			    struct fib_result *res,
 			    struct in_device *in_dev,
-			    __be32 daddr, __be32 saddr, u32 tos)
+			    __be32 daddr, __be32 saddr, u32 tos,
+			    struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi && res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, NULL, skb);
+		int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
 
 		fib_select_multipath(res, h);
 	}
@@ -1882,13 +1891,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       struct fib_result *res)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct flow_keys *flkeys = NULL, _flkeys;
+	struct net    *net = dev_net(dev);
 	struct ip_tunnel_info *tun_info;
-	struct flowi4	fl4;
+	int		err = -EINVAL;
 	unsigned int	flags = 0;
 	u32		itag = 0;
 	struct rtable	*rth;
-	int		err = -EINVAL;
-	struct net    *net = dev_net(dev);
+	struct flowi4	fl4;
 	bool do_cache;
 
 	/* IP on this device is disabled. */
@@ -1947,6 +1957,19 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	if (net->ipv4.fib_rules_require_fldissect) {
+		unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+		memset(&_flkeys, 0, sizeof(_flkeys));
+		skb_flow_dissect_flow_keys(skb, &_flkeys, flag);
+		fl4.fl4_sport = _flkeys.ports.src;
+		fl4.fl4_dport = _flkeys.ports.dst;
+		fl4.flowi4_proto = _flkeys.basic.ip_proto;
+		flkeys = &_flkeys;
+	}
+#endif
 	err = fib_lookup(net, &fl4, res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
@@ -1972,7 +1995,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
-	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:	return err;
 
 brd_input:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC net-next 4/4] ipv6: route: dissect flow in input path if fib rules need it
  2018-02-11 22:26 [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match Roopa Prabhu
                   ` (2 preceding siblings ...)
  2018-02-11 22:26 ` [PATCH RFC net-next 3/4] ipv4: route: dissect flow in input path if fib rules need it Roopa Prabhu
@ 2018-02-11 22:26 ` Roopa Prabhu
  2018-02-12  7:44   ` Ido Schimmel
  2018-02-12 20:36 ` [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match David Miller
  4 siblings, 1 reply; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-11 22:26 UTC (permalink / raw)
  To: netdev; +Cc: dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>

Dissect flow in fwd path if fib rules require it. Controlled by
a flag to avoid penatly for the common case. Flag is set when fib
rules with sport, dport and proto match that require flow dissect
are installed. Also passes the dissected hash keys to the multipath
hash function when applicable to avoid dissecting the flow again.
icmp packets will continue to use inner header for hash
calculations.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/net/ip6_route.h  |  3 ++-
 include/net/netns/ipv6.h |  1 +
 net/ipv6/fib6_rules.c    | 12 +++++++++---
 net/ipv6/icmp.c          |  2 +-
 net/ipv6/route.c         | 45 ++++++++++++++++++++++++++++++++++++---------
 5 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a6..218f89c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+		       struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc45..7aca00e 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -72,6 +72,7 @@ struct netns_ipv6 {
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	bool			 fib6_has_custom_rules;
+	bool			 fib6_rules_require_fldissect;
 	struct rt6_info         *ip6_prohibit_entry;
 	struct rt6_info         *ip6_blk_hole_entry;
 	struct fib6_table       *fib6_local_tbl;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 57c0836..68b946d 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -271,14 +271,20 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
-	if (tb[FRA_PROTO])
+	if (tb[FRA_PROTO]) {
 		rule6->proto = nla_get_u8(tb[FRA_PROTO]);
+		net->ipv6.fib6_rules_require_fldissect = true;
+	}
 
-	if (tb[FRA_SPORT])
+	if (tb[FRA_SPORT]) {
 		rule6->sport = nla_get_be16(tb[FRA_SPORT]);
+		net->ipv6.fib6_rules_require_fldissect = true;
+	}
 
-	if (tb[FRA_DPORT])
+	if (tb[FRA_DPORT]) {
 		rule6->dport = nla_get_be16(tb[FRA_DPORT]);
+		net->ipv6.fib6_rules_require_fldissect = true;
+	}
 
 	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3..6997b2c 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+	fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9dcfadd..742c312 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -460,7 +460,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+		fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -1786,10 +1786,12 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-				  struct flow_keys *keys)
+				  struct flow_keys *keys,
+				  struct flow_keys *flkeys)
 {
 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
 	const struct ipv6hdr *key_iph = outer_iph;
+	struct flow_keys *_flkeys = flkeys;
 	const struct ipv6hdr *inner_iph;
 	const struct icmp6hdr *icmph;
 	struct ipv6hdr _inner_iph;
@@ -1811,22 +1813,31 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
 		goto out;
 
 	key_iph = inner_iph;
+	_flkeys = NULL;
 out:
 	memset(keys, 0, sizeof(*keys));
 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys->addrs.v6addrs.src = key_iph->saddr;
-	keys->addrs.v6addrs.dst = key_iph->daddr;
-	keys->tags.flow_label = ip6_flowinfo(key_iph);
-	keys->basic.ip_proto = key_iph->nexthdr;
+	if (_flkeys) {
+		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+		keys->tags.flow_label = _flkeys->tags.flow_label;
+		keys->basic.ip_proto = _flkeys->basic.ip_proto;
+	} else {
+		keys->addrs.v6addrs.src = key_iph->saddr;
+		keys->addrs.v6addrs.dst = key_iph->daddr;
+		keys->tags.flow_label = ip6_flowinfo(key_iph);
+		keys->basic.ip_proto = key_iph->nexthdr;
+	}
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+		       struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
 
 	if (skb) {
-		ip6_multipath_l3_keys(skb, &hash_keys);
+		ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
 		return flow_hash_from_keys(&hash_keys) >> 1;
 	}
 
@@ -1847,12 +1858,27 @@ void ip6_route_input(struct sk_buff *skb)
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
 	};
+	struct flow_keys *flkeys = NULL, _flkeys;
 
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	if (net->ipv6.fib6_rules_require_fldissect) {
+		unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+		memset(&_flkeys, 0, sizeof(_flkeys));
+		skb_flow_dissect_flow_keys(skb, &_flkeys, flag);
+		fl6.fl6_sport = _flkeys.ports.src;
+		fl6.fl6_dport = _flkeys.ports.dst;
+		fl6.flowi6_proto = _flkeys.basic.ip_proto;
+		flkeys = &_flkeys;
+	}
+#endif
+
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+		fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
 	skb_dst_drop(skb);
 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 }
@@ -4903,6 +4929,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.fib6_has_custom_rules = false;
+	net->ipv6.fib6_rules_require_fldissect = false;
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
 					       sizeof(*net->ipv6.ip6_prohibit_entry),
 					       GFP_KERNEL);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 4/4] ipv6: route: dissect flow in input path if fib rules need it
  2018-02-11 22:26 ` [PATCH RFC net-next 4/4] ipv6: " Roopa Prabhu
@ 2018-02-12  7:44   ` Ido Schimmel
  2018-02-12 16:21     ` Roopa Prabhu
  0 siblings, 1 reply; 18+ messages in thread
From: Ido Schimmel @ 2018-02-12  7:44 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: netdev, dsa, nikolay

On Sun, Feb 11, 2018 at 02:26:29PM -0800, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> Dissect flow in fwd path if fib rules require it. Controlled by
> a flag to avoid penatly for the common case. Flag is set when fib
> rules with sport, dport and proto match that require flow dissect
> are installed. Also passes the dissected hash keys to the multipath
> hash function when applicable to avoid dissecting the flow again.
> icmp packets will continue to use inner header for hash
> calculations.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/net/ip6_route.h  |  3 ++-
>  include/net/netns/ipv6.h |  1 +
>  net/ipv6/fib6_rules.c    | 12 +++++++++---
>  net/ipv6/icmp.c          |  2 +-
>  net/ipv6/route.c         | 45 ++++++++++++++++++++++++++++++++++++---------
>  5 files changed, 49 insertions(+), 14 deletions(-)
> 
> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
> index 27d23a6..218f89c 100644
> --- a/include/net/ip6_route.h
> +++ b/include/net/ip6_route.h
> @@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
>  
>  struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
>  			    const struct in6_addr *saddr, int oif, int flags);
> -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
> +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
> +		       struct flow_keys *hkeys);
>  
>  struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
>  
> diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
> index 987cc45..7aca00e 100644
> --- a/include/net/netns/ipv6.h
> +++ b/include/net/netns/ipv6.h
> @@ -72,6 +72,7 @@ struct netns_ipv6 {
>  	unsigned long		 ip6_rt_last_gc;
>  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
>  	bool			 fib6_has_custom_rules;
> +	bool			 fib6_rules_require_fldissect;
>  	struct rt6_info         *ip6_prohibit_entry;
>  	struct rt6_info         *ip6_blk_hole_entry;
>  	struct fib6_table       *fib6_local_tbl;

[...]

> @@ -1847,12 +1858,27 @@ void ip6_route_input(struct sk_buff *skb)
>  		.flowi6_mark = skb->mark,
>  		.flowi6_proto = iph->nexthdr,
>  	};
> +	struct flow_keys *flkeys = NULL, _flkeys;
>  
>  	tun_info = skb_tunnel_info(skb);
>  	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
>  		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
> +
> +#ifdef CONFIG_IP_MULTIPLE_TABLES

s/CONFIG_IP_MULTIPLE_TABLES/CONFIG_IPV6_MULTIPLE_TABLES/ ?

> +	if (net->ipv6.fib6_rules_require_fldissect) {
> +		unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
> +
> +		memset(&_flkeys, 0, sizeof(_flkeys));
> +		skb_flow_dissect_flow_keys(skb, &_flkeys, flag);
> +		fl6.fl6_sport = _flkeys.ports.src;
> +		fl6.fl6_dport = _flkeys.ports.dst;
> +		fl6.flowi6_proto = _flkeys.basic.ip_proto;
> +		flkeys = &_flkeys;
> +	}
> +#endif

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 4/4] ipv6: route: dissect flow in input path if fib rules need it
  2018-02-12  7:44   ` Ido Schimmel
@ 2018-02-12 16:21     ` Roopa Prabhu
  0 siblings, 0 replies; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-12 16:21 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, David Ahern, Nikolay Aleksandrov

On Sun, Feb 11, 2018 at 11:44 PM, Ido Schimmel <idosch@idosch.org> wrote:
> On Sun, Feb 11, 2018 at 02:26:29PM -0800, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> Dissect flow in fwd path if fib rules require it. Controlled by
>> a flag to avoid penatly for the common case. Flag is set when fib
>> rules with sport, dport and proto match that require flow dissect
>> are installed. Also passes the dissected hash keys to the multipath
>> hash function when applicable to avoid dissecting the flow again.
>> icmp packets will continue to use inner header for hash
>> calculations.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>>  include/net/ip6_route.h  |  3 ++-
>>  include/net/netns/ipv6.h |  1 +
>>  net/ipv6/fib6_rules.c    | 12 +++++++++---
>>  net/ipv6/icmp.c          |  2 +-
>>  net/ipv6/route.c         | 45 ++++++++++++++++++++++++++++++++++++---------
>>  5 files changed, 49 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>> index 27d23a6..218f89c 100644
>> --- a/include/net/ip6_route.h
>> +++ b/include/net/ip6_route.h
>> @@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
>>
>>  struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
>>                           const struct in6_addr *saddr, int oif, int flags);
>> -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
>> +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
>> +                    struct flow_keys *hkeys);
>>
>>  struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
>>
>> diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
>> index 987cc45..7aca00e 100644
>> --- a/include/net/netns/ipv6.h
>> +++ b/include/net/netns/ipv6.h
>> @@ -72,6 +72,7 @@ struct netns_ipv6 {
>>       unsigned long            ip6_rt_last_gc;
>>  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
>>       bool                     fib6_has_custom_rules;
>> +     bool                     fib6_rules_require_fldissect;
>>       struct rt6_info         *ip6_prohibit_entry;
>>       struct rt6_info         *ip6_blk_hole_entry;
>>       struct fib6_table       *fib6_local_tbl;
>
> [...]
>
>> @@ -1847,12 +1858,27 @@ void ip6_route_input(struct sk_buff *skb)
>>               .flowi6_mark = skb->mark,
>>               .flowi6_proto = iph->nexthdr,
>>       };
>> +     struct flow_keys *flkeys = NULL, _flkeys;
>>
>>       tun_info = skb_tunnel_info(skb);
>>       if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
>>               fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
>> +
>> +#ifdef CONFIG_IP_MULTIPLE_TABLES
>
> s/CONFIG_IP_MULTIPLE_TABLES/CONFIG_IPV6_MULTIPLE_TABLES/ ?
>

ack, will fix,  thanks ido.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match
  2018-02-11 22:26 [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match Roopa Prabhu
                   ` (3 preceding siblings ...)
  2018-02-11 22:26 ` [PATCH RFC net-next 4/4] ipv6: " Roopa Prabhu
@ 2018-02-12 20:36 ` David Miller
  4 siblings, 0 replies; 18+ messages in thread
From: David Miller @ 2018-02-12 20:36 UTC (permalink / raw)
  To: roopa; +Cc: netdev, dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sun, 11 Feb 2018 14:26:25 -0800

> This series extends fib rule match support to include sport, dport
> and ip proto match (to complete the 5-tuple match support).
> Common use-cases of Policy based routing in the data center require
> 5-tuple match. The last 2 patches in the series add a call to flow dissect
> in the fwd path if required by the installed fib rules (controlled by a flag).

No major objections from me.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-11 22:26 ` [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto Roopa Prabhu
@ 2018-02-12 20:49   ` Eric Dumazet
  2018-02-12 21:05     ` David Miller
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2018-02-12 20:49 UTC (permalink / raw)
  To: Roopa Prabhu, netdev; +Cc: dsa, nikolay

On Sun, 2018-02-11 at 14:26 -0800, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> Add support to match on src port, dst port and ip protocol.
> 
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  include/uapi/linux/fib_rules.h |  3 +++
>  net/ipv4/fib_rules.c           | 46 ++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 47 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
> index 2b642bf..79ff3c2 100644
> --- a/include/uapi/linux/fib_rules.h
> +++ b/include/uapi/linux/fib_rules.h
> @@ -58,6 +58,9 @@ enum {
>  	FRA_PAD,
>  	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
>  	FRA_UID_RANGE,	/* UID range */
> +	FRA_PROTO,	/* ip proto */
> +	FRA_SPORT,	/* sport */
> +	FRA_DPORT,	/* dport */
>  	__FRA_MAX
>  };
>  
> diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
> index 35d646a..f204c85 100644
> --- a/net/ipv4/fib_rules.c
> +++ b/net/ipv4/fib_rules.c
> @@ -45,13 +45,17 @@ struct fib4_rule {
>  #ifdef CONFIG_IP_ROUTE_CLASSID
>  	u32			tclassid;
>  #endif
> +	__be16			sport;
> +	__be16			dport;
> +	u8			proto;
>  };
>  
>  static bool fib4_rule_matchall(const struct fib_rule *rule)
>  {
>  	struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
>  
> -	if (r->dst_len || r->src_len || r->tos)
> +	if (r->dst_len || r->src_len || r->tos || r->proto || r->sport ||
> +	    r->dport)
>  		return false;
>  	return fib_rule_matchall(rule);
>  }
> @@ -182,6 +186,15 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
>  	if (r->tos && (r->tos != fl4->flowi4_tos))
>  		return 0;
>  
> +	if (r->proto && (r->proto != fl4->flowi4_proto))
> +		return 0;
> +
> +	if (r->sport && (r->sport != fl4->fl4_sport))
> +		return 0;
> +
> +	if (r->dport && (r->dport != fl4->fl4_dport))
> +		return 0;
> +

Any setup with about 20 rules to be evaluated (per packet cost) will
feel the pain...

I wonder if we could JIT/eBPF this thing.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-12 20:49   ` Eric Dumazet
@ 2018-02-12 21:05     ` David Miller
  2018-02-12 21:07       ` David Ahern
  2018-02-12 21:54       ` Eric Dumazet
  0 siblings, 2 replies; 18+ messages in thread
From: David Miller @ 2018-02-12 21:05 UTC (permalink / raw)
  To: eric.dumazet; +Cc: roopa, netdev, dsa, nikolay

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 12 Feb 2018 12:49:33 -0800

> Any setup with about 20 rules to be evaluated (per packet cost) will
> feel the pain...
> 
> I wonder if we could JIT/eBPF this thing.

That's true for the software implementation angle.

But I bet anyone actually using this thing will get it hardware
offloaded.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-12 21:05     ` David Miller
@ 2018-02-12 21:07       ` David Ahern
  2018-02-12 21:54       ` Eric Dumazet
  1 sibling, 0 replies; 18+ messages in thread
From: David Ahern @ 2018-02-12 21:07 UTC (permalink / raw)
  To: David Miller, eric.dumazet; +Cc: roopa, netdev, nikolay

On 2/12/18 2:05 PM, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 12 Feb 2018 12:49:33 -0800
> 
>> Any setup with about 20 rules to be evaluated (per packet cost) will
>> feel the pain...
>>
>> I wonder if we could JIT/eBPF this thing.

I had the same comments -- overhead and ebpf and rules, but ...

> 
> That's true for the software implementation angle.
> 
> But I bet anyone actually using this thing will get it hardware
> offloaded.
> 

what Dave said. ;-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-12 21:05     ` David Miller
  2018-02-12 21:07       ` David Ahern
@ 2018-02-12 21:54       ` Eric Dumazet
  2018-02-12 23:03         ` David Ahern
  2018-02-13  0:03         ` David Miller
  1 sibling, 2 replies; 18+ messages in thread
From: Eric Dumazet @ 2018-02-12 21:54 UTC (permalink / raw)
  To: David Miller; +Cc: roopa, netdev, dsa, nikolay

On Mon, 2018-02-12 at 16:05 -0500, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 12 Feb 2018 12:49:33 -0800
> 
> > Any setup with about 20 rules to be evaluated (per packet cost) will
> > feel the pain...
> > 
> > I wonder if we could JIT/eBPF this thing.
> 
> That's true for the software implementation angle.
> 
> But I bet anyone actually using this thing will get it hardware
> offloaded.

I wish :)

We had project/teams using different routing tables for each vlan they
setup :/

Setups with tunnels are doubly impacted, it is really easy to reach 20
evaluated rules per incoming and outgoing packet.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-12 21:54       ` Eric Dumazet
@ 2018-02-12 23:03         ` David Ahern
  2018-02-13  0:03         ` David Miller
  1 sibling, 0 replies; 18+ messages in thread
From: David Ahern @ 2018-02-12 23:03 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: roopa, netdev, nikolay

On 2/12/18 2:54 PM, Eric Dumazet wrote:
> On Mon, 2018-02-12 at 16:05 -0500, David Miller wrote:
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Mon, 12 Feb 2018 12:49:33 -0800
>>
>>> Any setup with about 20 rules to be evaluated (per packet cost) will
>>> feel the pain...
>>>
>>> I wonder if we could JIT/eBPF this thing.
>>
>> That's true for the software implementation angle.
>>
>> But I bet anyone actually using this thing will get it hardware
>> offloaded.
> 
> I wish :)
> 
> We had project/teams using different routing tables for each vlan they
> setup :/

VRF per VLAN, only 1 rule needed

> 
> Setups with tunnels are doubly impacted, it is really easy to reach 20
> evaluated rules per incoming and outgoing packet.
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-12 21:54       ` Eric Dumazet
  2018-02-12 23:03         ` David Ahern
@ 2018-02-13  0:03         ` David Miller
  2018-02-13 16:19           ` Roopa Prabhu
  2018-02-13 18:21           ` Ben Greear
  1 sibling, 2 replies; 18+ messages in thread
From: David Miller @ 2018-02-13  0:03 UTC (permalink / raw)
  To: eric.dumazet; +Cc: roopa, netdev, dsa, nikolay

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 12 Feb 2018 13:54:59 -0800

> We had project/teams using different routing tables for each vlan they
> setup :/

Indeed, people use FIB rules and think they can scale in software.  As
currently implemented, they can't.

The example you give sounds possibly like a great VRF use case btw :-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-13  0:03         ` David Miller
@ 2018-02-13 16:19           ` Roopa Prabhu
  2018-02-13 16:34             ` David Miller
  2018-02-13 18:21           ` Ben Greear
  1 sibling, 1 reply; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-13 16:19 UTC (permalink / raw)
  To: David Miller; +Cc: Eric Dumazet, netdev, David Ahern, Nikolay Aleksandrov

On Mon, Feb 12, 2018 at 4:03 PM, David Miller <davem@davemloft.net> wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 12 Feb 2018 13:54:59 -0800
>
>> We had project/teams using different routing tables for each vlan they
>> setup :/
>
> Indeed, people use FIB rules and think they can scale in software.  As
> currently implemented, they can't.
>
> The example you give sounds possibly like a great VRF use case btw :-)



thanks for all the feedback so far.


replying to all the discussions on this thread so far here :):
- like davidA mentioned, we will be hardware offloading this. And the
most common hw offload case requires a 5-tuple match

- This series just extends the existing match options for people to
use the existing api if they choose too (with a performance penalty)
         - the main problem was telling people ...'oh, u cannot use ip
rules just because it does not support match on sport and so on and
there is no other way to do policy based routing on Linux'

- Regardless of this series, I think we should optimize ip rules or
have a new implementation of policy based routing. happy to hear about
possible options here:
         - optimize the existing implementation (have there been
previous discussions on possible options ?)
         - @netdev2.2, I did outline a possible option for tc to
provide an l3 hook for policy based routing (people were ok with this
and I was told more tc hooks were in the works).
            tc was a choice mainly because of all its existing match
options (flower for example). Will that help ?
         - We should have an ebpf accelerated implementation
regardless for people to use it if they want to scale rules

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-13 16:19           ` Roopa Prabhu
@ 2018-02-13 16:34             ` David Miller
  2018-02-13 16:36               ` Roopa Prabhu
  0 siblings, 1 reply; 18+ messages in thread
From: David Miller @ 2018-02-13 16:34 UTC (permalink / raw)
  To: roopa; +Cc: eric.dumazet, netdev, dsa, nikolay

From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 13 Feb 2018 08:19:35 -0800

> - Regardless of this series, I think we should optimize ip rules or
> have a new implementation of policy based routing. happy to hear about
> possible options here:
>          - optimize the existing implementation (have there been
> previous discussions on possible options ?)
>          - @netdev2.2, I did outline a possible option for tc to
> provide an l3 hook for policy based routing (people were ok with this
> and I was told more tc hooks were in the works).
>             tc was a choice mainly because of all its existing match
> options (flower for example). Will that help ?
>          - We should have an ebpf accelerated implementation
> regardless for people to use it if they want to scale rules

The problem is that arbitrary prefixing in the rules.  That prevents
O(1) algorithms from being used to maintain the table.

If they were all just non-prefixed keys we could do a hash table or
similar.

I guess we could do a trie (on saddr) to a (on daddr) trie (like the
ipv6 subtrees do), but that's a lot of complexity just for fib rules.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-13 16:34             ` David Miller
@ 2018-02-13 16:36               ` Roopa Prabhu
  0 siblings, 0 replies; 18+ messages in thread
From: Roopa Prabhu @ 2018-02-13 16:36 UTC (permalink / raw)
  To: David Miller; +Cc: Eric Dumazet, netdev, David Ahern, Nikolay Aleksandrov

On Tue, Feb 13, 2018 at 8:34 AM, David Miller <davem@davemloft.net> wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> Date: Tue, 13 Feb 2018 08:19:35 -0800
>
>> - Regardless of this series, I think we should optimize ip rules or
>> have a new implementation of policy based routing. happy to hear about
>> possible options here:
>>          - optimize the existing implementation (have there been
>> previous discussions on possible options ?)
>>          - @netdev2.2, I did outline a possible option for tc to
>> provide an l3 hook for policy based routing (people were ok with this
>> and I was told more tc hooks were in the works).
>>             tc was a choice mainly because of all its existing match
>> options (flower for example). Will that help ?
>>          - We should have an ebpf accelerated implementation
>> regardless for people to use it if they want to scale rules
>
> The problem is that arbitrary prefixing in the rules.  That prevents
> O(1) algorithms from being used to maintain the table.
>
> If they were all just non-prefixed keys we could do a hash table or
> similar.

ah, got it.

>
> I guess we could do a trie (on saddr) to a (on daddr) trie (like the
> ipv6 subtrees do), but that's a lot of complexity just for fib rules.
>

agreed.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto
  2018-02-13  0:03         ` David Miller
  2018-02-13 16:19           ` Roopa Prabhu
@ 2018-02-13 18:21           ` Ben Greear
  1 sibling, 0 replies; 18+ messages in thread
From: Ben Greear @ 2018-02-13 18:21 UTC (permalink / raw)
  To: David Miller, eric.dumazet; +Cc: roopa, netdev, dsa, nikolay

On 02/12/2018 04:03 PM, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 12 Feb 2018 13:54:59 -0800
>
>> We had project/teams using different routing tables for each vlan they
>> setup :/
>
> Indeed, people use FIB rules and think they can scale in software.  As
> currently implemented, they can't.
>
> The example you give sounds possibly like a great VRF use case btw :-)

I'm one of those people with lots of FIB rules wishing it would scale
better, and wanting a routing table per netdev.

If there is a relatively easy suggestion to make this work better, I'd
like to give it a try.  I have not looked at VRF at all to date...

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-02-13 18:21 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-11 22:26 [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match Roopa Prabhu
2018-02-11 22:26 ` [PATCH RFC net-next 1/4] ipv4: fib_rules: support match on sport, dport and ip proto Roopa Prabhu
2018-02-12 20:49   ` Eric Dumazet
2018-02-12 21:05     ` David Miller
2018-02-12 21:07       ` David Ahern
2018-02-12 21:54       ` Eric Dumazet
2018-02-12 23:03         ` David Ahern
2018-02-13  0:03         ` David Miller
2018-02-13 16:19           ` Roopa Prabhu
2018-02-13 16:34             ` David Miller
2018-02-13 16:36               ` Roopa Prabhu
2018-02-13 18:21           ` Ben Greear
2018-02-11 22:26 ` [PATCH RFC net-next 2/4] ipv6: fib6_rules: support for " Roopa Prabhu
2018-02-11 22:26 ` [PATCH RFC net-next 3/4] ipv4: route: dissect flow in input path if fib rules need it Roopa Prabhu
2018-02-11 22:26 ` [PATCH RFC net-next 4/4] ipv6: " Roopa Prabhu
2018-02-12  7:44   ` Ido Schimmel
2018-02-12 16:21     ` Roopa Prabhu
2018-02-12 20:36 ` [PATCH RFC net-next 0/4] fib_rules: support sport, dport and proto match David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.