All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC net-next 1/1] net: sched: protect against loops in TC filter hooks
@ 2019-05-24 16:05 John Hurley
  2019-05-24 16:23 ` Stephen Hemminger
  2019-05-24 18:32 ` Daniel Borkmann
  0 siblings, 2 replies; 5+ messages in thread
From: John Hurley @ 2019-05-24 16:05 UTC (permalink / raw)
  To: netdev, jiri, davem, xiyou.wangcong
  Cc: simon.horman, jakub.kicinski, oss-drivers, John Hurley

TC hooks allow the application of filters and actions to packets at both
ingress and egress of the network stack. It is possible, with poor
configuration, that this can produce loops whereby an ingress hook calls
a mirred egress action that has an egress hook that redirects back to
the first ingress etc. The TC core classifier protects against loops when
doing reclassifies but, as yet, there is no protection against a packet
looping between multiple hooks. This can lead to stack overflow panics.

Add a per cpu counter that tracks recursion of packets through TC hooks.
The packet will be dropped if a recursive limit is passed and the counter
reset for the next packet.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 net/core/dev.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index b6b8505..a6d9ed7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -154,6 +154,9 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
+#define SCH_RECURSION_LIMIT	4
+static DEFINE_PER_CPU(int, sch_recursion_level);
+
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
@@ -3598,16 +3601,42 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_loopback_xmit);
 
+static inline int sch_check_inc_recur_level(void)
+{
+	int rec_level = __this_cpu_inc_return(sch_recursion_level);
+
+	if (rec_level >= SCH_RECURSION_LIMIT) {
+		net_warn_ratelimited("Recursion limit reached on TC datapath, probable configuration error\n");
+		return -ELOOP;
+	}
+
+	return 0;
+}
+
+static inline void sch_dec_recur_level(void)
+{
+	__this_cpu_dec(sch_recursion_level);
+}
+
 #ifdef CONFIG_NET_EGRESS
 static struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
 	struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
 	struct tcf_result cl_res;
+	int err;
 
 	if (!miniq)
 		return skb;
 
+	err = sch_check_inc_recur_level();
+	if (err) {
+		sch_dec_recur_level();
+		*ret = NET_XMIT_DROP;
+		consume_skb(skb);
+		return NULL;
+	}
+
 	/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
 	mini_qdisc_bstats_cpu_update(miniq, skb);
 
@@ -3620,22 +3649,26 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 		mini_qdisc_qstats_cpu_drop(miniq);
 		*ret = NET_XMIT_DROP;
 		kfree_skb(skb);
-		return NULL;
+		skb = NULL;
+		break;
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
 	case TC_ACT_TRAP:
 		*ret = NET_XMIT_SUCCESS;
 		consume_skb(skb);
-		return NULL;
+		skb = NULL;
+		break;
 	case TC_ACT_REDIRECT:
 		/* No need to push/pop skb's mac_header here on egress! */
 		skb_do_redirect(skb);
 		*ret = NET_XMIT_SUCCESS;
-		return NULL;
+		skb = NULL;
+		break;
 	default:
 		break;
 	}
 
+	sch_dec_recur_level();
 	return skb;
 }
 #endif /* CONFIG_NET_EGRESS */
@@ -4670,6 +4703,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
 	struct tcf_result cl_res;
+	int err;
 
 	/* If there's at least one ingress present somewhere (so
 	 * we get here via enabled static key), remaining devices
@@ -4679,6 +4713,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 	if (!miniq)
 		return skb;
 
+	err = sch_check_inc_recur_level();
+	if (err) {
+		sch_dec_recur_level();
+		*ret = NET_XMIT_DROP;
+		consume_skb(skb);
+		return NULL;
+	}
+
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
@@ -4696,12 +4738,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 	case TC_ACT_SHOT:
 		mini_qdisc_qstats_cpu_drop(miniq);
 		kfree_skb(skb);
-		return NULL;
+		skb = NULL;
+		break;
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
 	case TC_ACT_TRAP:
 		consume_skb(skb);
-		return NULL;
+		skb = NULL;
+		break;
 	case TC_ACT_REDIRECT:
 		/* skb_mac_header check was done by cls/act_bpf, so
 		 * we can safely push the L2 header back before
@@ -4709,14 +4753,18 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		 */
 		__skb_push(skb, skb->mac_len);
 		skb_do_redirect(skb);
-		return NULL;
+		skb = NULL;
+		break;
 	case TC_ACT_REINSERT:
 		/* this does not scrub the packet, and updates stats on error */
 		skb_tc_reinsert(skb, &cl_res);
-		return NULL;
+		skb = NULL;
+		break;
 	default:
 		break;
 	}
+
+	sch_dec_recur_level();
 #endif /* CONFIG_NET_CLS_ACT */
 	return skb;
 }
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-05-28 16:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-24 16:05 [RFC net-next 1/1] net: sched: protect against loops in TC filter hooks John Hurley
2019-05-24 16:23 ` Stephen Hemminger
2019-05-24 18:32 ` Daniel Borkmann
2019-05-26 14:50   ` Jamal Hadi Salim
2019-05-28 16:56   ` John Hurley

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.