All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] flow_dissector: Flow dissector fixes
@ 2017-08-31 22:22 Tom Herbert
  2017-08-31 22:22 ` [PATCH net-next 1/2] flow_dissector: Cleanup control flow Tom Herbert
  2017-08-31 22:22 ` [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH Tom Herbert
  0 siblings, 2 replies; 12+ messages in thread
From: Tom Herbert @ 2017-08-31 22:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, alex.popov, hannes, Tom Herbert

This patch set fixes some basic issues with __skb_flow_dissect function.

Items addressed:
  - Cleanup control flow in the fucntion; in particular eliminate a
    bunch of goto's and implement a simplified control flow model
  - Add limits for number of encapsulations of extension headers that
    can be dissected

Tested:

Ran normal traffic, GUE, and VXLAN traffic.


Tom Herbert (2):
  flow_dissector: Cleanup control flow
  flow_dissector: Add limits for encapsulation and EH

 include/net/flow_dissector.h |   9 ++
 net/core/flow_dissector.c    | 267 ++++++++++++++++++++++++++++++-------------
 2 files changed, 198 insertions(+), 78 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH net-next 1/2] flow_dissector: Cleanup control flow
  2017-08-31 22:22 [PATCH net-next 0/2] flow_dissector: Flow dissector fixes Tom Herbert
@ 2017-08-31 22:22 ` Tom Herbert
  2017-09-01 12:26   ` Simon Horman
  2017-09-01 12:35   ` Hannes Frederic Sowa
  2017-08-31 22:22 ` [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH Tom Herbert
  1 sibling, 2 replies; 12+ messages in thread
From: Tom Herbert @ 2017-08-31 22:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, alex.popov, hannes, Tom Herbert

__skb_flow_dissect is riddled with gotos that make discerning the flow,
debugging, and extending the capability difficult. This patch
reorganizes things so that we only perform goto's after the two main
switch statements (no gotos within the cases now). It also eliminates
several goto labels so that there are only two labels that can be target
for goto.

Reported-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
 include/net/flow_dissector.h |   9 ++
 net/core/flow_dissector.c    | 225 ++++++++++++++++++++++++++++---------------
 2 files changed, 156 insertions(+), 78 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index e2663e900b0a..c358c3ff6acc 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -19,6 +19,15 @@ struct flow_dissector_key_control {
 #define FLOW_DIS_FIRST_FRAG	BIT(1)
 #define FLOW_DIS_ENCAPSULATION	BIT(2)
 
+enum flow_dissect_ret {
+	FLOW_DISSECT_RET_OUT_GOOD,
+	FLOW_DISSECT_RET_OUT_BAD,
+	FLOW_DISSECT_RET_PROTO_AGAIN,
+	FLOW_DISSECT_RET_IPPROTO_AGAIN,
+	FLOW_DISSECT_RET_IPPROTO_AGAIN_EH,
+	FLOW_DISSECT_RET_CONTINUE,
+};
+
 /**
  * struct flow_dissector_key_basic:
  * @thoff: Transport header offset
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e2eaa1ff948d..5110180a3e96 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -115,12 +115,6 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
-enum flow_dissect_ret {
-	FLOW_DISSECT_RET_OUT_GOOD,
-	FLOW_DISSECT_RET_OUT_BAD,
-	FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
-};
-
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
@@ -341,7 +335,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 	if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 		return FLOW_DISSECT_RET_OUT_GOOD;
 
-	return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+	return FLOW_DISSECT_RET_PROTO_AGAIN;
 }
 
 static void
@@ -431,6 +425,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
+	enum flow_dissect_ret fdret;
 	bool skip_vlan = false;
 	u8 ip_proto = 0;
 	bool ret;
@@ -482,14 +477,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	}
 
 proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (proto) {
 	case htons(ETH_P_IP): {
 		const struct iphdr *iph;
 		struct iphdr _iph;
-ip:
+
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph || iph->ihl < 5)
-			goto out_bad;
+		if (!iph || iph->ihl < 5) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		nhoff += iph->ihl * 4;
 
 		ip_proto = iph->protocol;
@@ -509,19 +509,25 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
 			if (iph->frag_off & htons(IP_OFFSET)) {
-				goto out_good;
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
 			} else {
 				key_control->flags |= FLOW_DIS_FIRST_FRAG;
-				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
-					goto out_good;
+				if (!(flags &
+				      FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) {
+					fdret = FLOW_DISSECT_RET_OUT_GOOD;
+					break;
+				}
 			}
 		}
 
 		__skb_flow_dissect_ipv4(skb, flow_dissector,
 					target_container, data, iph);
 
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
 
 		break;
 	}
@@ -529,10 +535,11 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		const struct ipv6hdr *iph;
 		struct ipv6hdr _iph;
 
-ipv6:
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph)
-			goto out_bad;
+		if (!iph) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = iph->nexthdr;
 		nhoff += sizeof(struct ipv6hdr);
@@ -561,15 +568,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 								     target_container);
 				key_tags->flow_label = ntohl(flow_label);
 			}
-			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
-				goto out_good;
+			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) {
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
+			}
 		}
 
 		__skb_flow_dissect_ipv6(skb, flow_dissector,
 					target_container, data, iph);
 
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
 
 		break;
 	}
@@ -585,12 +594,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
-			if (!vlan)
-				goto out_bad;
+			if (!vlan) {
+				fdret = FLOW_DISSECT_RET_OUT_BAD;
+				break;
+			}
+
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan)
-				goto proto_again;
+			if (skip_vlan) {
+				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+				break;
+			}
 		}
 
 		skip_vlan = true;
@@ -613,7 +627,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			}
 		}
 
-		goto proto_again;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 	}
 	case htons(ETH_P_PPP_SES): {
 		struct {
@@ -621,18 +636,27 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			__be16 proto;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		proto = hdr->proto;
 		nhoff += PPPOE_SES_HLEN;
 		switch (proto) {
 		case htons(PPP_IP):
-			goto ip;
+			proto = htons(ETH_P_IP);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		case htons(PPP_IPV6):
-			goto ipv6;
+			proto = htons(ETH_P_IPV6);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		default:
-			goto out_bad;
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
 		}
+		break;
 	}
 	case htons(ETH_P_TIPC): {
 		struct {
@@ -640,8 +664,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			__be32 srcnode;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		if (dissector_uses_key(flow_dissector,
 				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
@@ -651,56 +677,63 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
 			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
 		}
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 
 	case htons(ETH_P_MPLS_UC):
 	case htons(ETH_P_MPLS_MC):
-mpls:
-		switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+		fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
 						target_container, data,
-						nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+						nhoff, hlen);
+		break;
 	case htons(ETH_P_FCOE):
-		if ((hlen - nhoff) < FCOE_HEADER_LEN)
-			goto out_bad;
+		if ((hlen - nhoff) < FCOE_HEADER_LEN) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		nhoff += FCOE_HEADER_LEN;
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 
 	case htons(ETH_P_ARP):
 	case htons(ETH_P_RARP):
-		switch (__skb_flow_dissect_arp(skb, flow_dissector,
+		fdret = __skb_flow_dissect_arp(skb, flow_dissector,
 					       target_container, data,
-					       nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+					       nhoff, hlen);
+		break;
+
+	default:
+		fdret = FLOW_DISSECT_RET_OUT_BAD;
+		break;
+	}
+
+	/* Process result of proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_OUT_GOOD:
+		goto out_good;
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		goto proto_again;
+	case FLOW_DISSECT_RET_CONTINUE:
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN_EH:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
 	default:
 		goto out_bad;
 	}
 
 ip_proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (ip_proto) {
 	case IPPROTO_GRE:
-		switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+		fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
 					       target_container, data,
-					       &proto, &nhoff, &hlen, flags)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-			goto out_bad;
-		case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
-			goto proto_again;
-		}
+					       &proto, &nhoff, &hlen, flags);
+		break;
+
 	case NEXTHDR_HOP:
 	case NEXTHDR_ROUTING:
 	case NEXTHDR_DEST: {
@@ -711,13 +744,16 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 
 		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
 					      data, hlen, &_opthdr);
-		if (!opthdr)
-			goto out_bad;
+		if (!opthdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = opthdr[0];
 		nhoff += (opthdr[1] + 1) << 3;
 
-		goto ip_proto_again;
+		fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN_EH;
+		break;
 	}
 	case NEXTHDR_FRAGMENT: {
 		struct frag_hdr _fh, *fh;
@@ -728,8 +764,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
 					  data, hlen, &_fh);
 
-		if (!fh)
-			goto out_bad;
+		if (!fh) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
@@ -738,34 +776,50 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 
 		if (!(fh->frag_off & htons(IP6_OFFSET))) {
 			key_control->flags |= FLOW_DIS_FIRST_FRAG;
-			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
-				goto ip_proto_again;
+			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
+				fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN_EH;
+				break;
+			}
 		}
-		goto out_good;
+
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 	case IPPROTO_IPIP:
 		proto = htons(ETH_P_IP);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 
-		goto ip;
 	case IPPROTO_IPV6:
 		proto = htons(ETH_P_IPV6);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 
-		goto ipv6;
 	case IPPROTO_MPLS:
 		proto = htons(ETH_P_MPLS_UC);
-		goto mpls;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 	case IPPROTO_TCP:
 		__skb_flow_dissect_tcp(skb, flow_dissector, target_container,
 				       data, nhoff, hlen);
 		break;
+
 	default:
 		break;
 	}
@@ -787,6 +841,21 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
 	}
 
+	/* Process result of IP proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		goto proto_again;
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN_EH:
+		goto ip_proto_again;
+	case FLOW_DISSECT_RET_OUT_GOOD:
+	case FLOW_DISSECT_RET_CONTINUE:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
+	default:
+		goto out_bad;
+	}
+
 out_good:
 	ret = true;
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-08-31 22:22 [PATCH net-next 0/2] flow_dissector: Flow dissector fixes Tom Herbert
  2017-08-31 22:22 ` [PATCH net-next 1/2] flow_dissector: Cleanup control flow Tom Herbert
@ 2017-08-31 22:22 ` Tom Herbert
  2017-09-01 12:22   ` Simon Horman
  2017-09-01 13:32   ` Hannes Frederic Sowa
  1 sibling, 2 replies; 12+ messages in thread
From: Tom Herbert @ 2017-08-31 22:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, alex.popov, hannes, Tom Herbert

In flow dissector there are no limits to the number of nested
encapsulations that might be dissected which makes for a nice DOS
attack. This patch limits for dissecting nested encapsulations
as well as for dissecting over extension headers.

Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
 net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5110180a3e96..1bca748de27d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -396,6 +396,35 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
 	key_ip->ttl = iph->hop_limit;
 }
 
+/* Maximum number of nested encapsulations that can be processed in
+ * __skb_flow_dissect
+ */
+#define MAX_FLOW_DISSECT_ENCAPS	5
+
+static bool skb_flow_dissect_encap_allowed(int *num_encaps, unsigned int *flags)
+{
+	++*num_encaps;
+
+	if (*num_encaps >= MAX_FLOW_DISSECT_ENCAPS) {
+		if (*num_encaps == MAX_FLOW_DISSECT_ENCAPS) {
+			/* Allow one more pass but ignore disregard
+			 * further encapsulations
+			 */
+			*flags |= FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+		} else {
+			/* Max encaps reached */
+			return  false;
+		}
+	}
+
+	return true;
+}
+
+/* Maximum number of extension headers can be processed in __skb_flow_dissect
+ * per IPv6 packet
+ */
+#define MAX_FLOW_DISSECT_EH	5
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -426,6 +455,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
+	int num_eh, num_encaps = 0;
 	bool skip_vlan = false;
 	u8 ip_proto = 0;
 	bool ret;
@@ -714,7 +744,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	case FLOW_DISSECT_RET_OUT_GOOD:
 		goto out_good;
 	case FLOW_DISSECT_RET_PROTO_AGAIN:
-		goto proto_again;
+		if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
+			goto proto_again;
+		goto out_good;
 	case FLOW_DISSECT_RET_CONTINUE:
 	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
 	case FLOW_DISSECT_RET_IPPROTO_AGAIN_EH:
@@ -724,6 +756,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		goto out_bad;
 	}
 
+	num_eh = 0;
+
 ip_proto_again:
 	fdret = FLOW_DISSECT_RET_CONTINUE;
 
@@ -844,10 +878,18 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	/* Process result of IP proto processing */
 	switch (fdret) {
 	case FLOW_DISSECT_RET_PROTO_AGAIN:
-		goto proto_again;
+		if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
+			goto proto_again;
+		break;
 	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
+			goto ip_proto_again;
+		break;
 	case FLOW_DISSECT_RET_IPPROTO_AGAIN_EH:
-		goto ip_proto_again;
+		++num_eh;
+		if (num_eh <= MAX_FLOW_DISSECT_EH)
+			goto ip_proto_again;
+		break;
 	case FLOW_DISSECT_RET_OUT_GOOD:
 	case FLOW_DISSECT_RET_CONTINUE:
 		break;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-08-31 22:22 ` [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH Tom Herbert
@ 2017-09-01 12:22   ` Simon Horman
  2017-09-01 13:32   ` Hannes Frederic Sowa
  1 sibling, 0 replies; 12+ messages in thread
From: Simon Horman @ 2017-09-01 12:22 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, alex.popov, hannes

On Thu, Aug 31, 2017 at 03:22:39PM -0700, Tom Herbert wrote:
> In flow dissector there are no limits to the number of nested
> encapsulations that might be dissected which makes for a nice DOS
> attack. This patch limits for dissecting nested encapsulations
> as well as for dissecting over extension headers.
> 
> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Tom Herbert <tom@quantonium.net>
> ---
>  net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index 5110180a3e96..1bca748de27d 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -396,6 +396,35 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
>  	key_ip->ttl = iph->hop_limit;
>  }
>  
> +/* Maximum number of nested encapsulations that can be processed in
> + * __skb_flow_dissect
> + */
> +#define MAX_FLOW_DISSECT_ENCAPS	5
> +
> +static bool skb_flow_dissect_encap_allowed(int *num_encaps, unsigned int *flags)
> +{
> +	++*num_encaps;
> +
> +	if (*num_encaps >= MAX_FLOW_DISSECT_ENCAPS) {
> +		if (*num_encaps == MAX_FLOW_DISSECT_ENCAPS) {
> +			/* Allow one more pass but ignore disregard

It seems that 'ignore' or 'disregard' should be dropped from the text above.

> +			 * further encapsulations
> +			 */
> +			*flags |= FLOW_DISSECTOR_F_STOP_AT_ENCAP;
> +		} else {
> +			/* Max encaps reached */
> +			return  false;

There are two spaces between 'return' and 'false'.

...

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 1/2] flow_dissector: Cleanup control flow
  2017-08-31 22:22 ` [PATCH net-next 1/2] flow_dissector: Cleanup control flow Tom Herbert
@ 2017-09-01 12:26   ` Simon Horman
  2017-09-01 12:35   ` Hannes Frederic Sowa
  1 sibling, 0 replies; 12+ messages in thread
From: Simon Horman @ 2017-09-01 12:26 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, alex.popov, hannes

Hi Tom,

On Thu, Aug 31, 2017 at 03:22:38PM -0700, Tom Herbert wrote:
> __skb_flow_dissect is riddled with gotos that make discerning the flow,
> debugging, and extending the capability difficult. This patch
> reorganizes things so that we only perform goto's after the two main
> switch statements (no gotos within the cases now). It also eliminates
> several goto labels so that there are only two labels that can be target
> for goto.

I agree that the flow of __skb_flow_dissect() is difficult to follow
but its not obvious that this significant change in terms of loc
takes us to a better place.

Maybe it makes follow-up work easier. If so perhaps it should be motivated
along those lines.

In any case I won't stand in the way of this change but I did want to throw
my 2c worth in.

> 
> Reported-by: Alexander Popov <alex.popov@linux.com>
> Signed-off-by: Tom Herbert <tom@quantonium.net>
> ---
>  include/net/flow_dissector.h |   9 ++
>  net/core/flow_dissector.c    | 225 ++++++++++++++++++++++++++++---------------
>  2 files changed, 156 insertions(+), 78 deletions(-)
> 
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index e2663e900b0a..c358c3ff6acc 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -19,6 +19,15 @@ struct flow_dissector_key_control {
>  #define FLOW_DIS_FIRST_FRAG	BIT(1)
>  #define FLOW_DIS_ENCAPSULATION	BIT(2)
>  
> +enum flow_dissect_ret {
> +	FLOW_DISSECT_RET_OUT_GOOD,
> +	FLOW_DISSECT_RET_OUT_BAD,
> +	FLOW_DISSECT_RET_PROTO_AGAIN,
> +	FLOW_DISSECT_RET_IPPROTO_AGAIN,
> +	FLOW_DISSECT_RET_IPPROTO_AGAIN_EH,
> +	FLOW_DISSECT_RET_CONTINUE,
> +};

Minor nit:

My reading is that this patch does not seem to differentiate between the
handling of FLOW_DISSECT_RET_IPPROTO_AGAIN and
FLOW_DISSECT_RET_IPPROTO_AGAIN_EH.  Perhaps it would be better to add
FLOW_DISSECT_RET_IPPROTO_AGAIN_EH in the following patch where it is used.

> +
>  /**
>   * struct flow_dissector_key_basic:
>   * @thoff: Transport header offset

...

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 1/2] flow_dissector: Cleanup control flow
  2017-08-31 22:22 ` [PATCH net-next 1/2] flow_dissector: Cleanup control flow Tom Herbert
  2017-09-01 12:26   ` Simon Horman
@ 2017-09-01 12:35   ` Hannes Frederic Sowa
  2017-09-01 16:12     ` Tom Herbert
  1 sibling, 1 reply; 12+ messages in thread
From: Hannes Frederic Sowa @ 2017-09-01 12:35 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, alex.popov

Tom Herbert <tom@quantonium.net> writes:

> __skb_flow_dissect is riddled with gotos that make discerning the flow,
> debugging, and extending the capability difficult. This patch
> reorganizes things so that we only perform goto's after the two main
> switch statements (no gotos within the cases now). It also eliminates
> several goto labels so that there are only two labels that can be target
> for goto.

The problem with the

fdret = ... ;
break;

is that we now have to count curly braces to look what break does
actually break and where fdret is being processed. With the number of
context lines you send for the patch this is hard to review.

I tend to like the gotos a bit more for now.

[...]

Bye,
Hannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-08-31 22:22 ` [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH Tom Herbert
  2017-09-01 12:22   ` Simon Horman
@ 2017-09-01 13:32   ` Hannes Frederic Sowa
  2017-09-01 15:38     ` Tom Herbert
  1 sibling, 1 reply; 12+ messages in thread
From: Hannes Frederic Sowa @ 2017-09-01 13:32 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, alex.popov

Tom Herbert <tom@quantonium.net> writes:

> In flow dissector there are no limits to the number of nested
> encapsulations that might be dissected which makes for a nice DOS
> attack. This patch limits for dissecting nested encapsulations
> as well as for dissecting over extension headers.

I was actually more referring to your patch, because the flow dissector
right now is not stack recursive. Your changes would make it doing
recursion on the stack. But it seems something along the lines is anyway
needed. See below.

> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Tom Herbert <tom@quantonium.net>
> ---
>  net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index 5110180a3e96..1bca748de27d 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -396,6 +396,35 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
>  	key_ip->ttl = iph->hop_limit;
>  }
>  
> +/* Maximum number of nested encapsulations that can be processed in
> + * __skb_flow_dissect
> + */
> +#define MAX_FLOW_DISSECT_ENCAPS	5

I think you can exactly parse one encapsulation layer safely. This is
because you can only keep state on one encapsulation layer protocol
right now.

For example this scenario:

I would like to circumvent tc flower rules that filter based on vlan. I
simply construct a packet looking like:

Ethernet|Vlan|IP|GRE|Ethernet|Vlan|

because we don't recurse in the flow keys either, the second vlan header
would overwrite the information of the first one.

> +
> +static bool skb_flow_dissect_encap_allowed(int *num_encaps, unsigned int *flags)
> +{
> +	++*num_encaps;
> +
> +	if (*num_encaps >= MAX_FLOW_DISSECT_ENCAPS) {
> +		if (*num_encaps == MAX_FLOW_DISSECT_ENCAPS) {
> +			/* Allow one more pass but ignore disregard
> +			 * further encapsulations
> +			 */
> +			*flags |= FLOW_DISSECTOR_F_STOP_AT_ENCAP;
> +		} else {
> +			/* Max encaps reached */
> +			return  false;
> +		}
> +	}
> +
> +	return true;
> +}
> +
> +/* Maximum number of extension headers can be processed in __skb_flow_dissect
> + * per IPv6 packet
> + */
> +#define MAX_FLOW_DISSECT_EH	5

I would at least allow each extension header once, DEST_OPS twice, thus
let's say 12? It is safe in this version because it does not consume
stack space anyway and doesn't update internal state.

> +
>  /**
>   * __skb_flow_dissect - extract the flow_keys struct and return it
>   * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
> @@ -426,6 +455,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>  	struct flow_dissector_key_tags *key_tags;
>  	struct flow_dissector_key_vlan *key_vlan;
>  	enum flow_dissect_ret fdret;
> +	int num_eh, num_encaps = 0;
>  	bool skip_vlan = false;
>  	u8 ip_proto = 0;
>  	bool ret;
> @@ -714,7 +744,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>  	case FLOW_DISSECT_RET_OUT_GOOD:
>  		goto out_good;
>  	case FLOW_DISSECT_RET_PROTO_AGAIN:
> -		goto proto_again;
> +		if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
> +			goto proto_again;

I think you should get the check to the proto_again label. In case you
loop to often you can `goto out_good'.

[...]

Bye,
Hannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-09-01 13:32   ` Hannes Frederic Sowa
@ 2017-09-01 15:38     ` Tom Herbert
  2017-09-01 16:35       ` Hannes Frederic Sowa
  0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2017-09-01 15:38 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David S . Miller, Linux Kernel Network Developers, alex.popov

On Fri, Sep 1, 2017 at 6:32 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> Tom Herbert <tom@quantonium.net> writes:
>
>> In flow dissector there are no limits to the number of nested
>> encapsulations that might be dissected which makes for a nice DOS
>> attack. This patch limits for dissecting nested encapsulations
>> as well as for dissecting over extension headers.
>
> I was actually more referring to your patch, because the flow dissector
> right now is not stack recursive. Your changes would make it doing
> recursion on the stack.

I don't believe those patches had any recursion.

>> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> Signed-off-by: Tom Herbert <tom@quantonium.net>
>> ---
>>  net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 45 insertions(+), 3 deletions(-)
>>
>> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
>> index 5110180a3e96..1bca748de27d 100644
>> --- a/net/core/flow_dissector.c
>> +++ b/net/core/flow_dissector.c
>> @@ -396,6 +396,35 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
>>       key_ip->ttl = iph->hop_limit;
>>  }
>>
>> +/* Maximum number of nested encapsulations that can be processed in
>> + * __skb_flow_dissect
>> + */
>> +#define MAX_FLOW_DISSECT_ENCAPS      5
>
> I think you can exactly parse one encapsulation layer safely. This is
> because you can only keep state on one encapsulation layer protocol
> right now.
>
> For example this scenario:
>
> I would like to circumvent tc flower rules that filter base
> simply construct a packet looking like:
>
> Ethernet|Vlan|IP|GRE|Ethernet|Vlan|
>
> because we don't recurse in the flow keys either, the second vlan header
> would overwrite the information of the first one.
>
Flow dissector returns the inner most information it sees. If only
information from the outermost headers is needed then the caller
should set FLOW_DISSECTOR_F_STOP_AT_ENCAP in the flags.

>> +
>> +static bool skb_flow_dissect_encap_allowed(int *num_encaps, unsigned int *flags)
>> +{
>> +     ++*num_encaps;
>> +
>> +     if (*num_encaps >= MAX_FLOW_DISSECT_ENCAPS) {
>> +             if (*num_encaps == MAX_FLOW_DISSECT_ENCAPS) {
>> +                     /* Allow one more pass but ignore disregard
>> +                      * further encapsulations
>> +                      */
>> +                     *flags |= FLOW_DISSECTOR_F_STOP_AT_ENCAP;
>> +             } else {
>> +                     /* Max encaps reached */
>> +                     return  false;
>> +             }
>> +     }
>> +
>> +     return true;
>> +}
>> +
>> +/* Maximum number of extension headers can be processed in __skb_flow_dissect
>> + * per IPv6 packet
>> + */
>> +#define MAX_FLOW_DISSECT_EH  5
>
> I would at least allow each extension header once, DEST_OPS twice, thus
> let's say 12? It is safe in this version because it does not consume
> stack space anyway and doesn't update internal state.

This per each IPv6 header. This patch would allow up to five
encapsulations of IPv6/IPv6 so maximum number of EHs we would consider
is 6*5=30.

>
>> +
>>  /**
>>   * __skb_flow_dissect - extract the flow_keys struct and return it
>>   * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
>> @@ -426,6 +455,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>>       struct flow_dissector_key_tags *key_tags;
>>       struct flow_dissector_key_vlan *key_vlan;
>>       enum flow_dissect_ret fdret;
>> +     int num_eh, num_encaps = 0;
>>       bool skip_vlan = false;
>>       u8 ip_proto = 0;
>>       bool ret;
>> @@ -714,7 +744,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>>       case FLOW_DISSECT_RET_OUT_GOOD:
>>               goto out_good;
>>       case FLOW_DISSECT_RET_PROTO_AGAIN:
>> -             goto proto_again;
>> +             if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
>> +                     goto proto_again;
>
> I think you should get the check to the proto_again label. In case you
> loop to often you can `goto out_good'.
>
That would add an extra conditional in the common case of no
encapsulation. Having it here means we only care about the
encapsulation limit when there is encapsulation.

Tom

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 1/2] flow_dissector: Cleanup control flow
  2017-09-01 12:35   ` Hannes Frederic Sowa
@ 2017-09-01 16:12     ` Tom Herbert
  0 siblings, 0 replies; 12+ messages in thread
From: Tom Herbert @ 2017-09-01 16:12 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David S . Miller, Linux Kernel Network Developers, alex.popov

On Fri, Sep 1, 2017 at 5:35 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> Tom Herbert <tom@quantonium.net> writes:
>
>> __skb_flow_dissect is riddled with gotos that make discerning the flow,
>> debugging, and extending the capability difficult. This patch
>> reorganizes things so that we only perform goto's after the two main
>> switch statements (no gotos within the cases now). It also eliminates
>> several goto labels so that there are only two labels that can be target
>> for goto.
>
> The problem with the
>
> fdret = ... ;
> break;
>
> is that we now have to count curly braces to look what break does
> actually break and where fdret is being processed. With the number of
> context lines you send for the patch this is hard to review.
>
> I tend to like the gotos a bit more for now.

This is a step towards a more modular design for flow dissector. The
goto's force a monolithic design and make it hard to implement new
functionality like trying to enforce limits on encapsulation which
requires a single point for logic. The ip, ipv6, and mpls labels were
really unnecessary to begin with, the proto again logic works fine for
those. This is also a segway to breaking up the very large
__skb_flow_dissect function into more manageable components (IP
protocol dissection should be its own function for instance). Follow
on patches will allow protocol specific implementations of flow
dissection located with the rest of the protocol implementation, so
hopefully we can end the practice of adding support for every
networking protocol in one single core function (analogous to how we
parse protocols in GRO).

Tom

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-09-01 15:38     ` Tom Herbert
@ 2017-09-01 16:35       ` Hannes Frederic Sowa
  2017-09-01 16:49         ` Tom Herbert
  0 siblings, 1 reply; 12+ messages in thread
From: Hannes Frederic Sowa @ 2017-09-01 16:35 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David S . Miller, Linux Kernel Network Developers, alex.popov

Hello Tom,

Tom Herbert <tom@quantonium.net> writes:

> On Fri, Sep 1, 2017 at 6:32 AM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>> Tom Herbert <tom@quantonium.net> writes:
>>
>>> In flow dissector there are no limits to the number of nested
>>> encapsulations that might be dissected which makes for a nice DOS
>>> attack. This patch limits for dissecting nested encapsulations
>>> as well as for dissecting over extension headers.
>>
>> I was actually more referring to your patch, because the flow dissector
>> right now is not stack recursive. Your changes would make it doing
>> recursion on the stack.
>
> I don't believe those patches had any recursion.

I was wrong with stack recursion, you handle that using the
FLOW_DISSECT_RET_PROTO_AGAIN return value thus leaving the stack frame
again, sorry.

But otherwise the walk would be unlimited (based on the packet size) in
your first patchset, correct? See this malicious example:

| IP1 | UDP1 | VXLAN1 | Ethernet | IP2 | UDP2 | VXLAN2 | ...

where IP1 == IP2, UDP1 == UDP2 and VXLAN1 != VXLAN2?

Notice that because IP1 == IP2 and UDP1 == UDP2 it seems to me it would
hit the same socket again. We would be prone to overwrite vxlan id 1
with vxlan id 2 in the key thus the key would be malicious and traffic
could be injected into other tenant networks, if the encapsulated
packets within VXLAN1 could be generated by a malicious user?

I was actually not concerned about the "recursion" but merely about
updating the values to the innermost values.

>>> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>>> Signed-off-by: Tom Herbert <tom@quantonium.net>
>>> ---
>>>  net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---
>>>  1 file changed, 45 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
>>> index 5110180a3e96..1bca748de27d 100644
>>> --- a/net/core/flow_dissector.c
>>> +++ b/net/core/flow_dissector.c
>>> @@ -396,6 +396,35 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
>>>       key_ip->ttl = iph->hop_limit;
>>>  }
>>>
>>> +/* Maximum number of nested encapsulations that can be processed in
>>> + * __skb_flow_dissect
>>> + */
>>> +#define MAX_FLOW_DISSECT_ENCAPS      5
>>
>> I think you can exactly parse one encapsulation layer safely. This is
>> because you can only keep state on one encapsulation layer protocol
>> right now.
>>
>> For example this scenario:
>>
>> I would like to circumvent tc flower rules that filter base
>> simply construct a packet looking like:
>>
>> Ethernet|Vlan|IP|GRE|Ethernet|Vlan|
>>
>> because we don't recurse in the flow keys either, the second vlan header
>> would overwrite the information of the first one.
>>
> Flow dissector returns the inner most information it sees. If only
> information from the outermost headers is needed then the caller
> should set FLOW_DISSECTOR_F_STOP_AT_ENCAP in the flags.

Right now, flower does not do so and seems to be prone to be fooled like
that.

Additionally, flow_dissector does not update all keys to its inner most
values also, e.g. I don't see the inner packets updating
FLOW_DISSECTOR_KEY_ETH_ADDRS (e.g. with ETH_P_TEB).

To me it would make more sense to be able to specificy
FLOW_DISSECTOR_F_AFTER_FIRST_ENCAP, so that we get the first
encapsulation information with the key, do policy decision, pull headers
and recurse here.

>>> +
>>> +static bool skb_flow_dissect_encap_allowed(int *num_encaps, unsigned int *flags)
>>> +{
>>> +     ++*num_encaps;
>>> +
>>> +     if (*num_encaps >= MAX_FLOW_DISSECT_ENCAPS) {
>>> +             if (*num_encaps == MAX_FLOW_DISSECT_ENCAPS) {
>>> +                     /* Allow one more pass but ignore disregard
>>> +                      * further encapsulations
>>> +                      */
>>> +                     *flags |= FLOW_DISSECTOR_F_STOP_AT_ENCAP;
>>> +             } else {
>>> +                     /* Max encaps reached */
>>> +                     return  false;
>>> +             }
>>> +     }
>>> +
>>> +     return true;
>>> +}
>>> +
>>> +/* Maximum number of extension headers can be processed in __skb_flow_dissect
>>> + * per IPv6 packet
>>> + */
>>> +#define MAX_FLOW_DISSECT_EH  5
>>
>> I would at least allow each extension header once, DEST_OPS twice, thus
>> let's say 12? It is safe in this version because it does not consume
>> stack space anyway and doesn't update internal state.
>
> This per each IPv6 header. This patch would allow up to five
> encapsulations of IPv6/IPv6 so maximum number of EHs we would consider
> is 6*5=30.

I would still increase the limit, but also no hard feelings. The reason
I came up with 12 is that anyway each header (besides DST_OPS) is only
allowed to show up once per header (11 + 1).

I am fine that this is a per-IPv6 header limit.

>>
>>> +
>>>  /**
>>>   * __skb_flow_dissect - extract the flow_keys struct and return it
>>>   * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
>>> @@ -426,6 +455,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>>>       struct flow_dissector_key_tags *key_tags;
>>>       struct flow_dissector_key_vlan *key_vlan;
>>>       enum flow_dissect_ret fdret;
>>> +     int num_eh, num_encaps = 0;
>>>       bool skip_vlan = false;
>>>       u8 ip_proto = 0;
>>>       bool ret;
>>> @@ -714,7 +744,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>>>       case FLOW_DISSECT_RET_OUT_GOOD:
>>>               goto out_good;
>>>       case FLOW_DISSECT_RET_PROTO_AGAIN:
>>> -             goto proto_again;
>>> +             if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
>>> +                     goto proto_again;
>>
>> I think you should get the check to the proto_again label. In case you
>> loop to often you can `goto out_good'.
>>
> That would add an extra conditional in the common case of no
> encapsulation. Having it here means we only care about the
> encapsulation limit when there is encapsulation.

In my opinion it would make the limits easier to grasp, but no hard
feelings about that.

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-09-01 16:35       ` Hannes Frederic Sowa
@ 2017-09-01 16:49         ` Tom Herbert
  2017-09-01 17:05           ` Hannes Frederic Sowa
  0 siblings, 1 reply; 12+ messages in thread
From: Tom Herbert @ 2017-09-01 16:49 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, David S . Miller, Linux Kernel Network Developers,
	alex.popov

On Fri, Sep 1, 2017 at 9:35 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> Hello Tom,
>
> Tom Herbert <tom@quantonium.net> writes:
>
>> On Fri, Sep 1, 2017 at 6:32 AM, Hannes Frederic Sowa
>> <hannes@stressinduktion.org> wrote:
>>> Tom Herbert <tom@quantonium.net> writes:
>>>
>>>> In flow dissector there are no limits to the number of nested
>>>> encapsulations that might be dissected which makes for a nice DOS
>>>> attack. This patch limits for dissecting nested encapsulations
>>>> as well as for dissecting over extension headers.
>>>
>>> I was actually more referring to your patch, because the flow dissector
>>> right now is not stack recursive. Your changes would make it doing
>>> recursion on the stack.
>>
>> I don't believe those patches had any recursion.
>
> I was wrong with stack recursion, you handle that using the
> FLOW_DISSECT_RET_PROTO_AGAIN return value thus leaving the stack frame
> again, sorry.
>
> But otherwise the walk would be unlimited (based on the packet size) in
> your first patchset, correct? See this malicious example:
>
> | IP1 | UDP1 | VXLAN1 | Ethernet | IP2 | UDP2 | VXLAN2 | ...
>
Without the limits patch I subsequently proposed, yes. However, this
is true for all the other encapsulations anyway; there's is nothing
unique about UDP encapsulations in this regard (hence with the limit
patch should generally apply to all encapsulations).

> where IP1 == IP2, UDP1 == UDP2 and VXLAN1 != VXLAN2?
>
> Notice that because IP1 == IP2 and UDP1 == UDP2 it seems to me it would
> hit the same socket again. We would be prone to overwrite vxlan id 1
> with vxlan id 2 in the key thus the key would be malicious and traffic
> could be injected into other tenant networks, if the encapsulated
> packets within VXLAN1 could be generated by a malicious user?
>
This is why flow dissection is not an authoritative parsing of the
packet. It can be wrong or misleading because it doesn't have all the
context, doesn't necessarily parse the whole chain, and only returns
one set of information (for instance only one pair of IP addresses
when there may be more in a packet). It's just a best effort mechanism
that is great for computing a hash for instance. If someone is
steering a packet to a VM based on the output of flow dissector that
is a bug; the only correct way to do this is to go through the normal
receive protocol processing path.

> I was actually not concerned about the "recursion" but merely about
> updating the values to the innermost values.
>
See my previous comment about use STOP_AT_ENCAP.

Tom

>>>> Reported-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>>>> Signed-off-by: Tom Herbert <tom@quantonium.net>
>>>> ---
>>>>  net/core/flow_dissector.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---
>>>>  1 file changed, 45 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
>>>> index 5110180a3e96..1bca748de27d 100644
>>>> --- a/net/core/flow_dissector.c
>>>> +++ b/net/core/flow_dissector.c
>>>> @@ -396,6 +396,35 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
>>>>       key_ip->ttl = iph->hop_limit;
>>>>  }
>>>>
>>>> +/* Maximum number of nested encapsulations that can be processed in
>>>> + * __skb_flow_dissect
>>>> + */
>>>> +#define MAX_FLOW_DISSECT_ENCAPS      5
>>>
>>> I think you can exactly parse one encapsulation layer safely. This is
>>> because you can only keep state on one encapsulation layer protocol
>>> right now.
>>>
>>> For example this scenario:
>>>
>>> I would like to circumvent tc flower rules that filter base
>>> simply construct a packet looking like:
>>>
>>> Ethernet|Vlan|IP|GRE|Ethernet|Vlan|
>>>
>>> because we don't recurse in the flow keys either, the second vlan header
>>> would overwrite the information of the first one.
>>>
>> Flow dissector returns the inner most information it sees. If only
>> information from the outermost headers is needed then the caller
>> should set FLOW_DISSECTOR_F_STOP_AT_ENCAP in the flags.
>
> Right now, flower does not do so and seems to be prone to be fooled like
> that.
>
> Additionally, flow_dissector does not update all keys to its inner most
> values also, e.g. I don't see the inner packets updating
> FLOW_DISSECTOR_KEY_ETH_ADDRS (e.g. with ETH_P_TEB).
>
> To me it would make more sense to be able to specificy
> FLOW_DISSECTOR_F_AFTER_FIRST_ENCAP, so that we get the first
> encapsulation information with the key, do policy decision, pull headers
> and recurse here.
>
>>>> +
>>>> +static bool skb_flow_dissect_encap_allowed(int *num_encaps, unsigned int *flags)
>>>> +{
>>>> +     ++*num_encaps;
>>>> +
>>>> +     if (*num_encaps >= MAX_FLOW_DISSECT_ENCAPS) {
>>>> +             if (*num_encaps == MAX_FLOW_DISSECT_ENCAPS) {
>>>> +                     /* Allow one more pass but ignore disregard
>>>> +                      * further encapsulations
>>>> +                      */
>>>> +                     *flags |= FLOW_DISSECTOR_F_STOP_AT_ENCAP;
>>>> +             } else {
>>>> +                     /* Max encaps reached */
>>>> +                     return  false;
>>>> +             }
>>>> +     }
>>>> +
>>>> +     return true;
>>>> +}
>>>> +
>>>> +/* Maximum number of extension headers can be processed in __skb_flow_dissect
>>>> + * per IPv6 packet
>>>> + */
>>>> +#define MAX_FLOW_DISSECT_EH  5
>>>
>>> I would at least allow each extension header once, DEST_OPS twice, thus
>>> let's say 12? It is safe in this version because it does not consume
>>> stack space anyway and doesn't update internal state.
>>
>> This per each IPv6 header. This patch would allow up to five
>> encapsulations of IPv6/IPv6 so maximum number of EHs we would consider
>> is 6*5=30.
>
> I would still increase the limit, but also no hard feelings. The reason
> I came up with 12 is that anyway each header (besides DST_OPS) is only
> allowed to show up once per header (11 + 1).
>
> I am fine that this is a per-IPv6 header limit.
>
>>>
>>>> +
>>>>  /**
>>>>   * __skb_flow_dissect - extract the flow_keys struct and return it
>>>>   * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
>>>> @@ -426,6 +455,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>>>>       struct flow_dissector_key_tags *key_tags;
>>>>       struct flow_dissector_key_vlan *key_vlan;
>>>>       enum flow_dissect_ret fdret;
>>>> +     int num_eh, num_encaps = 0;
>>>>       bool skip_vlan = false;
>>>>       u8 ip_proto = 0;
>>>>       bool ret;
>>>> @@ -714,7 +744,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
>>>>       case FLOW_DISSECT_RET_OUT_GOOD:
>>>>               goto out_good;
>>>>       case FLOW_DISSECT_RET_PROTO_AGAIN:
>>>> -             goto proto_again;
>>>> +             if (skb_flow_dissect_encap_allowed(&num_encaps, &flags))
>>>> +                     goto proto_again;
>>>
>>> I think you should get the check to the proto_again label. In case you
>>> loop to often you can `goto out_good'.
>>>
>> That would add an extra conditional in the common case of no
>> encapsulation. Having it here means we only care about the
>> encapsulation limit when there is encapsulation.
>
> In my opinion it would make the limits easier to grasp, but no hard
> feelings about that.
>
> Thanks,
> Hannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH
  2017-09-01 16:49         ` Tom Herbert
@ 2017-09-01 17:05           ` Hannes Frederic Sowa
  0 siblings, 0 replies; 12+ messages in thread
From: Hannes Frederic Sowa @ 2017-09-01 17:05 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Tom Herbert, David S . Miller, Linux Kernel Network Developers,
	alex.popov

Tom Herbert <tom@herbertland.com> writes:

> On Fri, Sep 1, 2017 at 9:35 AM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>> Hello Tom,
>>
>> Tom Herbert <tom@quantonium.net> writes:
>>
>>> On Fri, Sep 1, 2017 at 6:32 AM, Hannes Frederic Sowa
>>> <hannes@stressinduktion.org> wrote:
>>>> Tom Herbert <tom@quantonium.net> writes:
>>>>
>>>>> In flow dissector there are no limits to the number of nested
>>>>> encapsulations that might be dissected which makes for a nice DOS
>>>>> attack. This patch limits for dissecting nested encapsulations
>>>>> as well as for dissecting over extension headers.
>>>>
>>>> I was actually more referring to your patch, because the flow dissector
>>>> right now is not stack recursive. Your changes would make it doing
>>>> recursion on the stack.
>>>
>>> I don't believe those patches had any recursion.
>>
>> I was wrong with stack recursion, you handle that using the
>> FLOW_DISSECT_RET_PROTO_AGAIN return value thus leaving the stack frame
>> again, sorry.
>>
>> But otherwise the walk would be unlimited (based on the packet size) in
>> your first patchset, correct? See this malicious example:
>>
>> | IP1 | UDP1 | VXLAN1 | Ethernet | IP2 | UDP2 | VXLAN2 | ...
>>
> Without the limits patch I subsequently proposed, yes. However, this
> is true for all the other encapsulations anyway; there's is nothing
> unique about UDP encapsulations in this regard (hence with the limit
> patch should generally apply to all encapsulations).

I used this example to show its possible security implications. Other
encaps definitely have the same problems.

>> where IP1 == IP2, UDP1 == UDP2 and VXLAN1 != VXLAN2?
>>
>> Notice that because IP1 == IP2 and UDP1 == UDP2 it seems to me it would
>> hit the same socket again. We would be prone to overwrite vxlan id 1
>> with vxlan id 2 in the key thus the key would be malicious and traffic
>> could be injected into other tenant networks, if the encapsulated
>> packets within VXLAN1 could be generated by a malicious user?
>>
> This is why flow dissection is not an authoritative parsing of the
> packet. It can be wrong or misleading because it doesn't have all the
> context, doesn't necessarily parse the whole chain, and only returns
> one set of information (for instance only one pair of IP addresses
> when there may be more in a packet). It's just a best effort mechanism
> that is great for computing a hash for instance. If someone is
> steering a packet to a VM based on the output of flow dissector that
> is a bug; the only correct way to do this is to go through the normal
> receive protocol processing path.

I think it must be agreed upon what flow dissector is. Especially I am
concerned about its use in cls_flower (or vice versa this patch).

>> I was actually not concerned about the "recursion" but merely about
>> updating the values to the innermost values.
>>
> See my previous comment about use STOP_AT_ENCAP.

For an authorative parser, for which it gets used in flower right now
STOP_AT_ENCAP might not make too much sense, because it might want to
look at one additional level of encapsulation information. But if you
don't consider the dissector to be an authorative parser for packets, it
would be okay.

Btw., I fear this recursion problem exists right now also with flower's
use of lwt.

[...]

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-09-01 17:05 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-31 22:22 [PATCH net-next 0/2] flow_dissector: Flow dissector fixes Tom Herbert
2017-08-31 22:22 ` [PATCH net-next 1/2] flow_dissector: Cleanup control flow Tom Herbert
2017-09-01 12:26   ` Simon Horman
2017-09-01 12:35   ` Hannes Frederic Sowa
2017-09-01 16:12     ` Tom Herbert
2017-08-31 22:22 ` [PATCH net-next 2/2] flow_dissector: Add limits for encapsulation and EH Tom Herbert
2017-09-01 12:22   ` Simon Horman
2017-09-01 13:32   ` Hannes Frederic Sowa
2017-09-01 15:38     ` Tom Herbert
2017-09-01 16:35       ` Hannes Frederic Sowa
2017-09-01 16:49         ` Tom Herbert
2017-09-01 17:05           ` Hannes Frederic Sowa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.