Netdev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH net] cls_flower: Fix the behavior using port ranges with hw-offload
@ 2019-12-03 10:40 Yoshiki Komachi
  2019-12-03 19:58 ` David Miller
  0 siblings, 1 reply; 2+ messages in thread
From: Yoshiki Komachi @ 2019-12-03 10:40 UTC (permalink / raw)
  To: David S. Miller, Jamal Hadi Salim, Cong Wang, Jiri Pirko
  Cc: Yoshiki Komachi, netdev

The recent commit 5c72299fba9d ("net: sched: cls_flower: Classify
packets using port ranges") had added filtering based on port ranges
to tc flower. However the commit missed necessary changes in hw-offload
code, so the feature gave rise to generating incorrect offloaded flow
keys in NIC.

One more detailed example is below:

$ tc qdisc add dev eth0 ingress
$ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \
  dst_port 100-200 action drop

With the setup above, an exact match filter with dst_port == 0 will be
installed in NIC by hw-offload. IOW, the NIC will have a rule which is
equivalent to the following one.

$ tc qdisc add dev eth0 ingress
$ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \
  dst_port 0 action drop

The behavior was caused by the flow dissector which extracts packet
data into the flow key in the tc flower. More specifically, regardless
of exact match or specified port ranges, fl_init_dissector() set the
FLOW_DISSECTOR_KEY_PORTS flag in struct flow_dissector to extract port
numbers from skb in skb_flow_dissect() called by fl_classify(). Note
that device drivers received the same struct flow_dissector object as
used in skb_flow_dissect(). Thus, offloaded drivers could not identify
which of these is used because the FLOW_DISSECTOR_KEY_PORTS flag was
set to struct flow_dissector in either case.

This patch adds the new FLOW_DISSECTOR_KEY_PORTS_RANGE flag and the new
tp_range field in struct fl_flow_key to recognize which filters are applied
to offloaded drivers. At this point, when filters based on port ranges
passed to drivers, drivers return the EOPNOTSUPP error because they do
not support the feature (the newly created FLOW_DISSECTOR_KEY_PORTS_RANGE
flag).

Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges")
Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
---
 include/net/flow_dissector.h |   1 +
 net/core/flow_dissector.c    |  37 ++++++++++----
 net/sched/cls_flower.c       | 118 ++++++++++++++++++++++++-------------------
 3 files changed, 95 insertions(+), 61 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index b8c20e9..d93017a 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -235,6 +235,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
 	FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
 	FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
+	FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */
 	FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
 	FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
 	FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ca87165..69395b8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -760,6 +760,31 @@ void skb_flow_dissect_meta(const struct sk_buff *skb,
 }
 
 static void
+__skb_flow_dissect_ports(const struct sk_buff *skb,
+			 struct flow_dissector *flow_dissector,
+			 void *target_container, void *data, int nhoff,
+			 u8 ip_proto, int hlen)
+{
+	enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
+	struct flow_dissector_key_ports *key_ports;
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
+		dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
+	else if (dissector_uses_key(flow_dissector,
+				    FLOW_DISSECTOR_KEY_PORTS_RANGE))
+		dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
+
+	if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
+		return;
+
+	key_ports = skb_flow_dissector_target(flow_dissector,
+					      dissector_ports,
+					      target_container);
+	key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+						data, hlen);
+}
+
+static void
 __skb_flow_dissect_ipv4(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container, void *data, const struct iphdr *iph)
@@ -928,7 +953,6 @@ bool __skb_flow_dissect(const struct net *net,
 	struct flow_dissector_key_control *key_control;
 	struct flow_dissector_key_basic *key_basic;
 	struct flow_dissector_key_addrs *key_addrs;
-	struct flow_dissector_key_ports *key_ports;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
 	struct bpf_prog *attached = NULL;
@@ -1383,14 +1407,9 @@ bool __skb_flow_dissect(const struct net *net,
 		break;
 	}
 
-	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
-	    !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
-		key_ports = skb_flow_dissector_target(flow_dissector,
-						      FLOW_DISSECTOR_KEY_PORTS,
-						      target_container);
-		key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
-							data, hlen);
-	}
+	if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT))
+		__skb_flow_dissect_ports(skb, flow_dissector, target_container,
+					 data, nhoff, ip_proto, hlen);
 
 	/* Process result of IP proto processing */
 	switch (fdret) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c307ee1..6c68971 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -56,8 +56,13 @@ struct fl_flow_key {
 	struct flow_dissector_key_ip ip;
 	struct flow_dissector_key_ip enc_ip;
 	struct flow_dissector_key_enc_opts enc_opts;
-	struct flow_dissector_key_ports tp_min;
-	struct flow_dissector_key_ports tp_max;
+	union {
+		struct flow_dissector_key_ports tp;
+		struct {
+			struct flow_dissector_key_ports tp_min;
+			struct flow_dissector_key_ports tp_max;
+		};
+	} tp_range;
 	struct flow_dissector_key_ct ct;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
@@ -200,19 +205,19 @@ static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
 {
 	__be16 min_mask, max_mask, min_val, max_val;
 
-	min_mask = htons(filter->mask->key.tp_min.dst);
-	max_mask = htons(filter->mask->key.tp_max.dst);
-	min_val = htons(filter->key.tp_min.dst);
-	max_val = htons(filter->key.tp_max.dst);
+	min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
+	max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
+	min_val = htons(filter->key.tp_range.tp_min.dst);
+	max_val = htons(filter->key.tp_range.tp_max.dst);
 
 	if (min_mask && max_mask) {
-		if (htons(key->tp.dst) < min_val ||
-		    htons(key->tp.dst) > max_val)
+		if (htons(key->tp_range.tp.dst) < min_val ||
+		    htons(key->tp_range.tp.dst) > max_val)
 			return false;
 
 		/* skb does not have min and max values */
-		mkey->tp_min.dst = filter->mkey.tp_min.dst;
-		mkey->tp_max.dst = filter->mkey.tp_max.dst;
+		mkey->tp_range.tp_min.dst = filter->mkey.tp_range.tp_min.dst;
+		mkey->tp_range.tp_max.dst = filter->mkey.tp_range.tp_max.dst;
 	}
 	return true;
 }
@@ -223,19 +228,19 @@ static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
 {
 	__be16 min_mask, max_mask, min_val, max_val;
 
-	min_mask = htons(filter->mask->key.tp_min.src);
-	max_mask = htons(filter->mask->key.tp_max.src);
-	min_val = htons(filter->key.tp_min.src);
-	max_val = htons(filter->key.tp_max.src);
+	min_mask = htons(filter->mask->key.tp_range.tp_min.src);
+	max_mask = htons(filter->mask->key.tp_range.tp_max.src);
+	min_val = htons(filter->key.tp_range.tp_min.src);
+	max_val = htons(filter->key.tp_range.tp_max.src);
 
 	if (min_mask && max_mask) {
-		if (htons(key->tp.src) < min_val ||
-		    htons(key->tp.src) > max_val)
+		if (htons(key->tp_range.tp.src) < min_val ||
+		    htons(key->tp_range.tp.src) > max_val)
 			return false;
 
 		/* skb does not have min and max values */
-		mkey->tp_min.src = filter->mkey.tp_min.src;
-		mkey->tp_max.src = filter->mkey.tp_max.src;
+		mkey->tp_range.tp_min.src = filter->mkey.tp_range.tp_min.src;
+		mkey->tp_range.tp_max.src = filter->mkey.tp_range.tp_max.src;
 	}
 	return true;
 }
@@ -734,23 +739,25 @@ static void fl_set_key_val(struct nlattr **tb,
 static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
 				 struct fl_flow_key *mask)
 {
-	fl_set_key_val(tb, &key->tp_min.dst,
-		       TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
-		       TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
-	fl_set_key_val(tb, &key->tp_max.dst,
-		       TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
-		       TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
-	fl_set_key_val(tb, &key->tp_min.src,
-		       TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
-		       TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
-	fl_set_key_val(tb, &key->tp_max.src,
-		       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
-		       TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
-
-	if ((mask->tp_min.dst && mask->tp_max.dst &&
-	     htons(key->tp_max.dst) <= htons(key->tp_min.dst)) ||
-	     (mask->tp_min.src && mask->tp_max.src &&
-	      htons(key->tp_max.src) <= htons(key->tp_min.src)))
+	fl_set_key_val(tb, &key->tp_range.tp_min.dst,
+		       TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_range.tp_min.dst,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.dst));
+	fl_set_key_val(tb, &key->tp_range.tp_max.dst,
+		       TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_range.tp_max.dst,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.dst));
+	fl_set_key_val(tb, &key->tp_range.tp_min.src,
+		       TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_range.tp_min.src,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_min.src));
+	fl_set_key_val(tb, &key->tp_range.tp_max.src,
+		       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
+		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
+
+	if ((mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
+	     htons(key->tp_range.tp_max.dst) <=
+		 htons(key->tp_range.tp_min.dst)) ||
+	    (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
+	     htons(key->tp_range.tp_max.src) <=
+		 htons(key->tp_range.tp_min.src)))
 		return -EINVAL;
 
 	return 0;
@@ -1509,9 +1516,10 @@ static void fl_init_dissector(struct flow_dissector *dissector,
 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-	if (FL_KEY_IS_MASKED(mask, tp) ||
-	    FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max))
-		FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_PORTS_RANGE, tp_range);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IP, ip);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
@@ -1560,8 +1568,10 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
 
 	fl_mask_copy(newmask, mask);
 
-	if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) ||
-	    (newmask->key.tp_min.src && newmask->key.tp_max.src))
+	if ((newmask->key.tp_range.tp_min.dst &&
+	     newmask->key.tp_range.tp_max.dst) ||
+	    (newmask->key.tp_range.tp_min.src &&
+	     newmask->key.tp_range.tp_max.src))
 		newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
 
 	err = fl_init_mask_hashtable(newmask);
@@ -2159,18 +2169,22 @@ static int fl_dump_key_val(struct sk_buff *skb,
 static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
 				  struct fl_flow_key *mask)
 {
-	if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN,
-			    &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
-			    sizeof(key->tp_min.dst)) ||
-	    fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX,
-			    &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
-			    sizeof(key->tp_max.dst)) ||
-	    fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN,
-			    &mask->tp_min.src, TCA_FLOWER_UNSPEC,
-			    sizeof(key->tp_min.src)) ||
-	    fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX,
-			    &mask->tp_max.src, TCA_FLOWER_UNSPEC,
-			    sizeof(key->tp_max.src)))
+	if (fl_dump_key_val(skb, &key->tp_range.tp_min.dst,
+			    TCA_FLOWER_KEY_PORT_DST_MIN,
+			    &mask->tp_range.tp_min.dst, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_range.tp_min.dst)) ||
+	    fl_dump_key_val(skb, &key->tp_range.tp_max.dst,
+			    TCA_FLOWER_KEY_PORT_DST_MAX,
+			    &mask->tp_range.tp_max.dst, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_range.tp_max.dst)) ||
+	    fl_dump_key_val(skb, &key->tp_range.tp_min.src,
+			    TCA_FLOWER_KEY_PORT_SRC_MIN,
+			    &mask->tp_range.tp_min.src, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_range.tp_min.src)) ||
+	    fl_dump_key_val(skb, &key->tp_range.tp_max.src,
+			    TCA_FLOWER_KEY_PORT_SRC_MAX,
+			    &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC,
+			    sizeof(key->tp_range.tp_max.src)))
 		return -1;
 
 	return 0;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH net] cls_flower: Fix the behavior using port ranges with hw-offload
  2019-12-03 10:40 [PATCH net] cls_flower: Fix the behavior using port ranges with hw-offload Yoshiki Komachi
@ 2019-12-03 19:58 ` David Miller
  0 siblings, 0 replies; 2+ messages in thread
From: David Miller @ 2019-12-03 19:58 UTC (permalink / raw)
  To: komachi.yoshiki; +Cc: jhs, xiyou.wangcong, jiri, netdev

From: Yoshiki Komachi <komachi.yoshiki@gmail.com>
Date: Tue,  3 Dec 2019 19:40:12 +0900

> The recent commit 5c72299fba9d ("net: sched: cls_flower: Classify
> packets using port ranges") had added filtering based on port ranges
> to tc flower. However the commit missed necessary changes in hw-offload
> code, so the feature gave rise to generating incorrect offloaded flow
> keys in NIC.
> 
> One more detailed example is below:
> 
> $ tc qdisc add dev eth0 ingress
> $ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \
>   dst_port 100-200 action drop
> 
> With the setup above, an exact match filter with dst_port == 0 will be
> installed in NIC by hw-offload. IOW, the NIC will have a rule which is
> equivalent to the following one.
> 
> $ tc qdisc add dev eth0 ingress
> $ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \
>   dst_port 0 action drop
> 
> The behavior was caused by the flow dissector which extracts packet
> data into the flow key in the tc flower. More specifically, regardless
> of exact match or specified port ranges, fl_init_dissector() set the
> FLOW_DISSECTOR_KEY_PORTS flag in struct flow_dissector to extract port
> numbers from skb in skb_flow_dissect() called by fl_classify(). Note
> that device drivers received the same struct flow_dissector object as
> used in skb_flow_dissect(). Thus, offloaded drivers could not identify
> which of these is used because the FLOW_DISSECTOR_KEY_PORTS flag was
> set to struct flow_dissector in either case.
> 
> This patch adds the new FLOW_DISSECTOR_KEY_PORTS_RANGE flag and the new
> tp_range field in struct fl_flow_key to recognize which filters are applied
> to offloaded drivers. At this point, when filters based on port ranges
> passed to drivers, drivers return the EOPNOTSUPP error because they do
> not support the feature (the newly created FLOW_DISSECTOR_KEY_PORTS_RANGE
> flag).
> 
> Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges")
> Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>

Applied and queued up for -stable, thank you.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-03 10:40 [PATCH net] cls_flower: Fix the behavior using port ranges with hw-offload Yoshiki Komachi
2019-12-03 19:58 ` David Miller

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git