All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory
@ 2020-12-16 20:43 Felix Fietkau
  2020-12-16 20:43 ` [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing Felix Fietkau
                   ` (7 more replies)
  0 siblings, 8 replies; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

This is similar to what sch_fq_codel does. It also amortizes the worst
case cost of a follow-up patch that changes the selection of the biggest
flow for dropping packets

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/fq_impl.h | 55 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index e73d74d2fabf..06d2a79233c9 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -11,17 +11,25 @@
 
 /* functions that are embedded into includer */
 
+
+static void
+__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets,
+		    unsigned int bytes, unsigned int truesize)
+{
+	struct fq_tin *tin = flow->tin;
+
+	tin->backlog_bytes -= bytes;
+	tin->backlog_packets -= packets;
+	flow->backlog -= bytes;
+	fq->backlog -= packets;
+	fq->memory_usage -= truesize;
+}
+
 static void fq_adjust_removal(struct fq *fq,
 			      struct fq_flow *flow,
 			      struct sk_buff *skb)
 {
-	struct fq_tin *tin = flow->tin;
-
-	tin->backlog_bytes -= skb->len;
-	tin->backlog_packets--;
-	flow->backlog -= skb->len;
-	fq->backlog--;
-	fq->memory_usage -= skb->truesize;
+	__fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
 }
 
 static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
@@ -59,6 +67,34 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq,
 	return skb;
 }
 
+static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
+			fq_skb_free_t free_func)
+{
+	unsigned int packets = 0, bytes = 0, truesize = 0;
+	struct fq_tin *tin = flow->tin;
+	struct sk_buff *skb;
+	int pending;
+
+	lockdep_assert_held(&fq->lock);
+
+	pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
+	do {
+		skb = __skb_dequeue(&flow->queue);
+		if (!skb)
+			break;
+
+		packets++;
+		bytes += skb->len;
+		truesize += skb->truesize;
+		free_func(fq, tin, flow, skb);
+	} while (packets < pending);
+
+	__fq_adjust_removal(fq, flow, packets, bytes, truesize);
+	fq_rejigger_backlog(fq, flow);
+
+	return packets;
+}
+
 static struct sk_buff *fq_tin_dequeue(struct fq *fq,
 				      struct fq_tin *tin,
 				      fq_tin_dequeue_t dequeue_func)
@@ -190,12 +226,9 @@ static void fq_tin_enqueue(struct fq *fq,
 		if (!flow)
 			return;
 
-		skb = fq_flow_dequeue(fq, flow);
-		if (!skb)
+		if (!fq_flow_drop(fq, flow, free_func))
 			return;
 
-		free_func(fq, flow->tin, flow, skb);
-
 		flow->tin->overlimit++;
 		fq->overlimit++;
 		if (oom) {
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
@ 2020-12-16 20:43 ` Felix Fietkau
  2020-12-17 11:54   ` Toke Høiland-Jørgensen
  2020-12-16 20:43 ` [PATCH 3/7] net/fq_impl: drop get_default_func, move default flow to fq_tin Felix Fietkau
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

Depending on the source, a hardware calculated hash may not provide the
same level of collision resistance.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 net/mac80211/tx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6422da6690f7..f1c934f21d7e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3937,7 +3937,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 	if (local->ops->wake_tx_queue) {
 		u16 queue = __ieee80211_select_queue(sdata, sta, skb);
 		skb_set_queue_mapping(skb, queue);
-		skb_get_hash(skb);
+		if (!skb->sw_hash)
+			__skb_get_hash(skb);
 	}
 
 	if (sta) {
@@ -4191,7 +4192,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
 	if (local->ops->wake_tx_queue) {
 		u16 queue = __ieee80211_select_queue(sdata, sta, skb);
 		skb_set_queue_mapping(skb, queue);
-		skb_get_hash(skb);
+		if (!skb->sw_hash)
+			__skb_get_hash(skb);
 	}
 
 	if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning)) &&
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/7] net/fq_impl: drop get_default_func, move default flow to fq_tin
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
  2020-12-16 20:43 ` [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing Felix Fietkau
@ 2020-12-16 20:43 ` Felix Fietkau
  2020-12-17 11:55   ` Toke Høiland-Jørgensen
  2020-12-16 20:43 ` [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows Felix Fietkau
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

Simplifies the code and prepares for a rework of scanning for flows on
overmemory drop.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/fq.h           |  1 +
 include/net/fq_impl.h      | 11 +++++------
 net/mac80211/ieee80211_i.h |  1 -
 net/mac80211/tx.c          | 22 ++++------------------
 4 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/include/net/fq.h b/include/net/fq.h
index e39f3f8d5f8a..5df100b77099 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -47,6 +47,7 @@ struct fq_flow {
 struct fq_tin {
 	struct list_head new_flows;
 	struct list_head old_flows;
+	struct fq_flow default_flow;
 	u32 backlog_bytes;
 	u32 backlog_packets;
 	u32 overlimit;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index 06d2a79233c9..dd374c7f0fe5 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -151,8 +151,7 @@ static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
 
 static struct fq_flow *fq_flow_classify(struct fq *fq,
 					struct fq_tin *tin, u32 idx,
-					struct sk_buff *skb,
-					fq_flow_get_default_t get_default_func)
+					struct sk_buff *skb)
 {
 	struct fq_flow *flow;
 
@@ -160,7 +159,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq,
 
 	flow = &fq->flows[idx];
 	if (flow->tin && flow->tin != tin) {
-		flow = get_default_func(fq, tin, idx, skb);
+		flow = &tin->default_flow;
 		tin->collisions++;
 		fq->collisions++;
 	}
@@ -192,15 +191,14 @@ static void fq_recalc_backlog(struct fq *fq,
 static void fq_tin_enqueue(struct fq *fq,
 			   struct fq_tin *tin, u32 idx,
 			   struct sk_buff *skb,
-			   fq_skb_free_t free_func,
-			   fq_flow_get_default_t get_default_func)
+			   fq_skb_free_t free_func)
 {
 	struct fq_flow *flow;
 	bool oom;
 
 	lockdep_assert_held(&fq->lock);
 
-	flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
+	flow = fq_flow_classify(fq, tin, idx, skb);
 
 	flow->tin = tin;
 	flow->backlog += skb->len;
@@ -331,6 +329,7 @@ static void fq_tin_init(struct fq_tin *tin)
 {
 	INIT_LIST_HEAD(&tin->new_flows);
 	INIT_LIST_HEAD(&tin->old_flows);
+	fq_flow_init(&tin->default_flow);
 }
 
 static int fq_init(struct fq *fq, int flows_cnt)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8bf9c0e974d6..c0f6168fdeed 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -848,7 +848,6 @@ enum txq_info_flags {
  */
 struct txq_info {
 	struct fq_tin tin;
-	struct fq_flow def_flow;
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f1c934f21d7e..50b4e92fd766 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1309,7 +1309,7 @@ static struct sk_buff *codel_dequeue_func(struct codel_vars *cvars,
 	fq = &local->fq;
 
 	if (cvars == &txqi->def_cvars)
-		flow = &txqi->def_flow;
+		flow = &txqi->tin.default_flow;
 	else
 		flow = &fq->flows[cvars - local->cvars];
 
@@ -1352,7 +1352,7 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
 		cparams = &local->cparams;
 	}
 
-	if (flow == &txqi->def_flow)
+	if (flow == &tin->default_flow)
 		cvars = &txqi->def_cvars;
 	else
 		cvars = &local->cvars[flow - fq->flows];
@@ -1379,17 +1379,6 @@ static void fq_skb_free_func(struct fq *fq,
 	ieee80211_free_txskb(&local->hw, skb);
 }
 
-static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
-						struct fq_tin *tin,
-						int idx,
-						struct sk_buff *skb)
-{
-	struct txq_info *txqi;
-
-	txqi = container_of(tin, struct txq_info, tin);
-	return &txqi->def_flow;
-}
-
 static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 				  struct txq_info *txqi,
 				  struct sk_buff *skb)
@@ -1402,8 +1391,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 
 	spin_lock_bh(&fq->lock);
 	fq_tin_enqueue(fq, tin, flow_idx, skb,
-		       fq_skb_free_func,
-		       fq_flow_get_default_func);
+		       fq_skb_free_func);
 	spin_unlock_bh(&fq->lock);
 }
 
@@ -1446,7 +1434,6 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 			struct txq_info *txqi, int tid)
 {
 	fq_tin_init(&txqi->tin);
-	fq_flow_init(&txqi->def_flow);
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
@@ -3283,8 +3270,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	 */
 
 	tin = &txqi->tin;
-	flow = fq_flow_classify(fq, tin, flow_idx, skb,
-				fq_flow_get_default_func);
+	flow = fq_flow_classify(fq, tin, flow_idx, skb);
 	head = skb_peek_tail(&flow->queue);
 	if (!head || skb_is_gso(head))
 		goto out;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
  2020-12-16 20:43 ` [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing Felix Fietkau
  2020-12-16 20:43 ` [PATCH 3/7] net/fq_impl: drop get_default_func, move default flow to fq_tin Felix Fietkau
@ 2020-12-16 20:43 ` Felix Fietkau
  2020-12-16 20:59   ` Johannes Berg
  2020-12-16 20:43 ` [PATCH 5/7] mac80211: fix encryption key selection for 802.3 xmit Felix Fietkau
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

A sorted flow list is only needed to drop packets in the biggest flow when
hitting the overmemory condition.
By scanning flows only when needed, we can avoid paying the cost of
maintaining the list under normal conditions
In order to avoid scanning lots of empty flows and touching too many cold
cache lines, a bitmap of flows with backlog is maintained

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/fq.h      |  10 ++--
 include/net/fq_impl.h | 127 +++++++++++++++++++++++++++---------------
 net/mac80211/tx.c     |   2 -
 3 files changed, 85 insertions(+), 54 deletions(-)

diff --git a/include/net/fq.h b/include/net/fq.h
index 5df100b77099..4c6efb1e8952 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -19,8 +19,6 @@ struct fq_tin;
  * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++
  *	(deficit round robin) based round robin queuing similar to the one
  *	found in net/sched/sch_fq_codel.c
- * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of
- *	fat flows and efficient head-dropping if packet limit is reached
  * @queue: sk_buff queue to hold packets
  * @backlog: number of bytes pending in the queue. The number of packets can be
  *	found in @queue.qlen
@@ -29,7 +27,6 @@ struct fq_tin;
 struct fq_flow {
 	struct fq_tin *tin;
 	struct list_head flowchain;
-	struct list_head backlogchain;
 	struct sk_buff_head queue;
 	u32 backlog;
 	int deficit;
@@ -47,6 +44,7 @@ struct fq_flow {
 struct fq_tin {
 	struct list_head new_flows;
 	struct list_head old_flows;
+	struct list_head tin_list;
 	struct fq_flow default_flow;
 	u32 backlog_bytes;
 	u32 backlog_packets;
@@ -60,14 +58,14 @@ struct fq_tin {
 /**
  * struct fq - main container for fair queuing purposes
  *
- * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient
- *	head-dropping when @backlog reaches @limit
  * @limit: max number of packets that can be queued across all flows
  * @backlog: number of packets queued across all flows
  */
 struct fq {
 	struct fq_flow *flows;
-	struct list_head backlogs;
+	u32 *flows_bitmap;
+
+	struct list_head tin_backlog;
 	spinlock_t lock;
 	u32 flows_cnt;
 	u32 limit;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index dd374c7f0fe5..c3e053dce0f2 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -17,12 +17,24 @@ __fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets,
 		    unsigned int bytes, unsigned int truesize)
 {
 	struct fq_tin *tin = flow->tin;
+	int idx;
 
 	tin->backlog_bytes -= bytes;
 	tin->backlog_packets -= packets;
 	flow->backlog -= bytes;
 	fq->backlog -= packets;
 	fq->memory_usage -= truesize;
+
+	if (flow->backlog)
+		return;
+
+	if (flow == &tin->default_flow) {
+		list_del_init(&tin->tin_list);
+		return;
+	}
+
+	idx = flow - fq->flows;
+	fq->flows_bitmap[idx / 32] &= ~BIT(idx % 32);
 }
 
 static void fq_adjust_removal(struct fq *fq,
@@ -32,24 +44,6 @@ static void fq_adjust_removal(struct fq *fq,
 	__fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
 }
 
-static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
-{
-	struct fq_flow *i;
-
-	if (flow->backlog == 0) {
-		list_del_init(&flow->backlogchain);
-	} else {
-		i = flow;
-
-		list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
-			if (i->backlog < flow->backlog)
-				break;
-
-		list_move_tail(&flow->backlogchain,
-			       &i->backlogchain);
-	}
-}
-
 static struct sk_buff *fq_flow_dequeue(struct fq *fq,
 				       struct fq_flow *flow)
 {
@@ -62,7 +56,6 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq,
 		return NULL;
 
 	fq_adjust_removal(fq, flow, skb);
-	fq_rejigger_backlog(fq, flow);
 
 	return skb;
 }
@@ -90,7 +83,6 @@ static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
 	} while (packets < pending);
 
 	__fq_adjust_removal(fq, flow, packets, bytes, truesize);
-	fq_rejigger_backlog(fq, flow);
 
 	return packets;
 }
@@ -170,22 +162,50 @@ static struct fq_flow *fq_flow_classify(struct fq *fq,
 	return flow;
 }
 
-static void fq_recalc_backlog(struct fq *fq,
-			      struct fq_tin *tin,
-			      struct fq_flow *flow)
+static struct fq_flow *fq_find_fattest_flow(struct fq *fq)
 {
-	struct fq_flow *i;
+	struct fq_tin *tin;
+	struct fq_flow *flow = NULL;
+	u32 len = 0;
+	int base;
+
+	for (base = 0; base < fq->flows_cnt; base += 32) {
+		u32 mask = fq->flows_bitmap[base / 32];
+		unsigned int cur_len;
+		u8 i = 0;
+
+		while (mask) {
+			struct fq_flow *cur;
+			int first;
+
+			first = ffs(mask);
+			if (first < 32)
+				mask >>= first;
+			else
+				mask = 0;
+			i += first;
+
+			cur = &fq->flows[base + i - 1];
+			cur_len = cur->backlog;
+			if (cur_len <= len)
+				continue;
+
+			flow = cur;
+			len = cur_len;
+		}
+	}
 
-	if (list_empty(&flow->backlogchain))
-		list_add_tail(&flow->backlogchain, &fq->backlogs);
+	list_for_each_entry(tin, &fq->tin_backlog, tin_list) {
+		unsigned int cur_len = tin->default_flow.backlog;
 
-	i = flow;
-	list_for_each_entry_continue_reverse(i, &fq->backlogs,
-					     backlogchain)
-		if (i->backlog > flow->backlog)
-			break;
+		if (cur_len <= len)
+			continue;
 
-	list_move(&flow->backlogchain, &i->backlogchain);
+		flow = &tin->default_flow;
+		len = cur_len;
+	}
+
+	return flow;
 }
 
 static void fq_tin_enqueue(struct fq *fq,
@@ -200,6 +220,13 @@ static void fq_tin_enqueue(struct fq *fq,
 
 	flow = fq_flow_classify(fq, tin, idx, skb);
 
+	if (!flow->backlog) {
+		if (flow != &tin->default_flow)
+			fq->flows_bitmap[idx / 32] |= BIT(idx % 32);
+		else if (list_empty(&tin->tin_list))
+			list_add(&tin->tin_list, &fq->tin_backlog);
+	}
+
 	flow->tin = tin;
 	flow->backlog += skb->len;
 	tin->backlog_bytes += skb->len;
@@ -207,8 +234,6 @@ static void fq_tin_enqueue(struct fq *fq,
 	fq->memory_usage += skb->truesize;
 	fq->backlog++;
 
-	fq_recalc_backlog(fq, tin, flow);
-
 	if (list_empty(&flow->flowchain)) {
 		flow->deficit = fq->quantum;
 		list_add_tail(&flow->flowchain,
@@ -218,9 +243,7 @@ static void fq_tin_enqueue(struct fq *fq,
 	__skb_queue_tail(&flow->queue, skb);
 	oom = (fq->memory_usage > fq->memory_limit);
 	while (fq->backlog > fq->limit || oom) {
-		flow = list_first_entry_or_null(&fq->backlogs,
-						struct fq_flow,
-						backlogchain);
+		flow = fq_find_fattest_flow(fq);
 		if (!flow)
 			return;
 
@@ -255,8 +278,6 @@ static void fq_flow_filter(struct fq *fq,
 		fq_adjust_removal(fq, flow, skb);
 		free_func(fq, tin, flow, skb);
 	}
-
-	fq_rejigger_backlog(fq, flow);
 }
 
 static void fq_tin_filter(struct fq *fq,
@@ -279,16 +300,18 @@ static void fq_flow_reset(struct fq *fq,
 			  struct fq_flow *flow,
 			  fq_skb_free_t free_func)
 {
+	struct fq_tin *tin = flow->tin;
 	struct sk_buff *skb;
 
 	while ((skb = fq_flow_dequeue(fq, flow)))
-		free_func(fq, flow->tin, flow, skb);
+		free_func(fq, tin, flow, skb);
 
-	if (!list_empty(&flow->flowchain))
+	if (!list_empty(&flow->flowchain)) {
 		list_del_init(&flow->flowchain);
-
-	if (!list_empty(&flow->backlogchain))
-		list_del_init(&flow->backlogchain);
+		if (list_empty(&tin->new_flows) &&
+		    list_empty(&tin->old_flows))
+			list_del_init(&tin->tin_list);
+	}
 
 	flow->tin = NULL;
 
@@ -314,6 +337,7 @@ static void fq_tin_reset(struct fq *fq,
 		fq_flow_reset(fq, flow, free_func);
 	}
 
+	WARN_ON_ONCE(!list_empty(&tin->tin_list));
 	WARN_ON_ONCE(tin->backlog_bytes);
 	WARN_ON_ONCE(tin->backlog_packets);
 }
@@ -321,7 +345,6 @@ static void fq_tin_reset(struct fq *fq,
 static void fq_flow_init(struct fq_flow *flow)
 {
 	INIT_LIST_HEAD(&flow->flowchain);
-	INIT_LIST_HEAD(&flow->backlogchain);
 	__skb_queue_head_init(&flow->queue);
 }
 
@@ -329,6 +352,7 @@ static void fq_tin_init(struct fq_tin *tin)
 {
 	INIT_LIST_HEAD(&tin->new_flows);
 	INIT_LIST_HEAD(&tin->old_flows);
+	INIT_LIST_HEAD(&tin->tin_list);
 	fq_flow_init(&tin->default_flow);
 }
 
@@ -337,8 +361,8 @@ static int fq_init(struct fq *fq, int flows_cnt)
 	int i;
 
 	memset(fq, 0, sizeof(fq[0]));
-	INIT_LIST_HEAD(&fq->backlogs);
 	spin_lock_init(&fq->lock);
+	INIT_LIST_HEAD(&fq->tin_backlog);
 	fq->flows_cnt = max_t(u32, flows_cnt, 1);
 	fq->quantum = 300;
 	fq->limit = 8192;
@@ -348,6 +372,14 @@ static int fq_init(struct fq *fq, int flows_cnt)
 	if (!fq->flows)
 		return -ENOMEM;
 
+	fq->flows_bitmap = kcalloc(DIV_ROUND_UP(fq->flows_cnt, 32), sizeof(u32),
+				   GFP_KERNEL);
+	if (!fq->flows_bitmap) {
+		kvfree(fq->flows);
+		fq->flows = NULL;
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < fq->flows_cnt; i++)
 		fq_flow_init(&fq->flows[i]);
 
@@ -364,6 +396,9 @@ static void fq_reset(struct fq *fq,
 
 	kvfree(fq->flows);
 	fq->flows = NULL;
+
+	kfree(fq->flows_bitmap);
+	fq->flows_bitmap = NULL;
 }
 
 #endif
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 50b4e92fd766..cb3c2d6334b3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3337,8 +3337,6 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (head->len != orig_len) {
 		flow->backlog += head->len - orig_len;
 		tin->backlog_bytes += head->len - orig_len;
-
-		fq_recalc_backlog(fq, tin, flow);
 	}
 out:
 	spin_unlock_bh(&fq->lock);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/7] mac80211: fix encryption key selection for 802.3 xmit
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
                   ` (2 preceding siblings ...)
  2020-12-16 20:43 ` [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows Felix Fietkau
@ 2020-12-16 20:43 ` Felix Fietkau
  2020-12-16 20:43 ` [PATCH 6/7] mac80211: fix fast-rx encryption check Felix Fietkau
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

When using WEP, the default unicast key needs to be selected, instead of
the STA PTK.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 net/mac80211/tx.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cb3c2d6334b3..3eb9604e316f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4237,7 +4237,6 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
 	struct ethhdr *ehdr = (struct ethhdr *)skb->data;
 	struct ieee80211_key *key;
 	struct sta_info *sta;
-	bool offload = true;
 
 	if (unlikely(skb->len < ETH_HLEN)) {
 		kfree_skb(skb);
@@ -4253,18 +4252,22 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
 
 	if (unlikely(IS_ERR_OR_NULL(sta) || !sta->uploaded ||
 	    !test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
-		sdata->control_port_protocol == ehdr->h_proto))
-		offload = false;
-	else if ((key = rcu_dereference(sta->ptk[sta->ptk_idx])) &&
-		 (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) ||
-		  key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
-		offload = false;
-
-	if (offload)
-		ieee80211_8023_xmit(sdata, dev, sta, key, skb);
-	else
-		ieee80211_subif_start_xmit(skb, dev);
+	    sdata->control_port_protocol == ehdr->h_proto))
+		goto skip_offload;
+
+	key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+	if (!key)
+		key = rcu_dereference(sdata->default_unicast_key);
+
+	if (key && (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) ||
+		    key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
+		goto skip_offload;
+
+	ieee80211_8023_xmit(sdata, dev, sta, key, skb);
+	goto out;
 
+skip_offload:
+	ieee80211_subif_start_xmit(skb, dev);
 out:
 	rcu_read_unlock();
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 6/7] mac80211: fix fast-rx encryption check
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
                   ` (3 preceding siblings ...)
  2020-12-16 20:43 ` [PATCH 5/7] mac80211: fix encryption key selection for 802.3 xmit Felix Fietkau
@ 2020-12-16 20:43 ` Felix Fietkau
  2020-12-16 20:43 ` [PATCH 7/7] mac80211: add rx decapsulation offload support Felix Fietkau
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

When using WEP, the default unicast key needs to be selected, instead of
the STA PTK.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 net/mac80211/rx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 13b9bcc4865d..972895e9f22d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4176,6 +4176,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
 	rcu_read_lock();
 	key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+	if (!key)
+		key = rcu_dereference(sdata->default_unicast_key);
 	if (key) {
 		switch (key->conf.cipher) {
 		case WLAN_CIPHER_SUITE_TKIP:
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 7/7] mac80211: add rx decapsulation offload support
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
                   ` (4 preceding siblings ...)
  2020-12-16 20:43 ` [PATCH 6/7] mac80211: fix fast-rx encryption check Felix Fietkau
@ 2020-12-16 20:43 ` Felix Fietkau
  2020-12-16 21:03   ` Johannes Berg
  2020-12-16 21:04   ` Johannes Berg
  2020-12-16 20:54 ` [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Johannes Berg
  2020-12-17 11:51 ` Toke Høiland-Jørgensen
  7 siblings, 2 replies; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 20:43 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

This allows drivers to pass 802.3 frames to mac80211, with some restrictions:

- the skb must be passed with a valid sta
- fast-rx needs to be active for the sta
- monitor mode needs to be disabled

mac80211 will tell the driver when it is safe to enable rx decap offload for
a particular station.

In order to implement support, a driver must:

- call ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD)
- implement ops->sta_set_decap_offload
- mark 802.3 frames with RX_FLAG_8023

If it doesn't want to enable offload for some vif types, it can mask out
IEEE80211_OFFLOAD_DECAP_ENABLED in vif->offload_flags from within the
.add_interface or .update_vif_offload driver ops

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/mac80211.h     |  16 +++
 net/mac80211/debugfs.c     |   1 +
 net/mac80211/debugfs_sta.c |   1 +
 net/mac80211/driver-ops.h  |  16 +++
 net/mac80211/iface.c       |  11 ++
 net/mac80211/rx.c          | 243 ++++++++++++++++++++++++-------------
 net/mac80211/sta_info.h    |   2 +
 net/mac80211/trace.h       |  18 ++-
 8 files changed, 222 insertions(+), 86 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d315740581f1..296d39e93a13 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1296,6 +1296,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	the "0-length PSDU" field included there.  The value for it is
  *	in &struct ieee80211_rx_status.  Note that if this value isn't
  *	known the frame shouldn't be reported.
+ * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by
+ *	hardware or driver)
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1328,6 +1330,7 @@ enum mac80211_rx_flags {
 	RX_FLAG_RADIOTAP_HE_MU		= BIT(27),
 	RX_FLAG_RADIOTAP_LSIG		= BIT(28),
 	RX_FLAG_NO_PSDU			= BIT(29),
+	RX_FLAG_8023			= BIT(30),
 };
 
 /**
@@ -1649,11 +1652,15 @@ enum ieee80211_vif_flags {
  *	The driver supports sending frames passed as 802.3 frames by mac80211.
  *	It must also support sending 802.11 packets for the same interface.
  * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload
+ * @IEEE80211_OFFLOAD_DECAP_ENABLED: rx encapsulation offload is enabled
+ *	The driver supports passing received 802.11 frames as 802.3 frames to
+ *	mac80211.
  */
 
 enum ieee80211_offload_flags {
 	IEEE80211_OFFLOAD_ENCAP_ENABLED		= BIT(0),
 	IEEE80211_OFFLOAD_ENCAP_4ADDR		= BIT(1),
+	IEEE80211_OFFLOAD_DECAP_ENABLED		= BIT(2),
 };
 
 /**
@@ -2389,6 +2396,9 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation
  *	offload
  *
+ * @IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD: Hardware supports rx decapsulation
+ *	offload
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2442,6 +2452,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
 	IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT,
 	IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD,
+	IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
@@ -3880,6 +3891,8 @@ enum ieee80211_reconfig_type {
  *	This callback may sleep.
  * @sta_set_4addr: Called to notify the driver when a station starts/stops using
  *	4-address mode
+ * @sta_set_decap_offload: Called to notify the driver when a station is allowed
+ *	to use rx decapsulation offload
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -4197,6 +4210,9 @@ struct ieee80211_ops {
 			      struct ieee80211_sta *sta, bool enabled);
 	int (*set_sar_specs)(struct ieee80211_hw *hw,
 			     const struct cfg80211_sar_specs *sar);
+	void (*sta_set_decap_offload)(struct ieee80211_hw *hw,
+				      struct ieee80211_vif *vif,
+				      struct ieee80211_sta *sta, bool enabled);
 };
 
 /**
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 48f144f107d5..60e5994e3ea3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -409,6 +409,7 @@ static const char *hw_flag_names[] = {
 	FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
 	FLAG(AMPDU_KEYBORDER_SUPPORT),
 	FLAG(SUPPORTS_TX_ENCAP_OFFLOAD),
+	FLAG(SUPPORTS_RX_DECAP_OFFLOAD),
 #undef FLAG
 };
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index eb4bb79d936a..5a27c61a7b38 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -79,6 +79,7 @@ static const char * const sta_flag_names[] = {
 	FLAG(MPSP_RECIPIENT),
 	FLAG(PS_DELIVER),
 	FLAG(USES_ENCRYPTION),
+	FLAG(DECAP_OFFLOAD),
 #undef FLAG
 };
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index bcdfd19a596b..604ca59937f0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1413,4 +1413,20 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
+					     struct ieee80211_sub_if_data *sdata,
+					     struct ieee80211_sta *sta,
+					     bool enabled)
+{
+	sdata = get_bss_sdata(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_sta_set_decap_offload(local, sdata, sta, enabled);
+	if (local->ops->sta_set_decap_offload)
+		local->ops->sta_set_decap_offload(&local->hw, &sdata->vif, sta,
+						  enabled);
+	trace_drv_return_void(local);
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3b9ec4ef81c3..e59275015e38 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -798,10 +798,21 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat
 		flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
 	}
 
+	if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) &&
+	    ieee80211_iftype_supports_encap_offload(sdata->vif.type)) {
+		flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;
+
+		if (local->monitors)
+			flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
+	} else {
+		flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
+	}
+
 	if (sdata->vif.offload_flags == flags)
 		return false;
 
 	sdata->vif.offload_flags = flags;
+	ieee80211_check_fast_rx_iface(sdata);
 	return true;
 }
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 972895e9f22d..c1343c028b76 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4095,7 +4095,9 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 		.vif_type = sdata->vif.type,
 		.control_port_protocol = sdata->control_port_protocol,
 	}, *old, *new = NULL;
+	bool set_offload = false;
 	bool assign = false;
+	bool offload;
 
 	/* use sparse to check that we don't return without updating */
 	__acquire(check_fast_rx);
@@ -4208,6 +4210,17 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 	if (assign)
 		new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL);
 
+	offload = assign &&
+		  (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
+
+	if (offload)
+		set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
+	else
+		set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
+
+	if (set_offload)
+		drv_sta_set_decap_offload(local, sdata, &sta->sta, assign);
+
 	spin_lock_bh(&sta->lock);
 	old = rcu_dereference_protected(sta->fast_rx, true);
 	rcu_assign_pointer(sta->fast_rx, new);
@@ -4254,6 +4267,104 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
 	mutex_unlock(&local->sta_mtx);
 }
 
+static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
+			      struct ieee80211_fast_rx *fast_rx,
+			      int orig_len)
+{
+	struct ieee80211_sta_rx_stats *stats;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+	struct sta_info *sta = rx->sta;
+	struct sk_buff *skb = rx->skb;
+	void *sa = skb->data + ETH_ALEN;
+	void *da = skb->data;
+
+	stats = &sta->rx_stats;
+	if (fast_rx->uses_rss)
+		stats = this_cpu_ptr(sta->pcpu_rx_stats);
+
+	/* statistics part of ieee80211_rx_h_sta_process() */
+	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
+		stats->last_signal = status->signal;
+		if (!fast_rx->uses_rss)
+			ewma_signal_add(&sta->rx_stats_avg.signal,
+					-status->signal);
+	}
+
+	if (status->chains) {
+		int i;
+
+		stats->chains = status->chains;
+		for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
+			int signal = status->chain_signal[i];
+
+			if (!(status->chains & BIT(i)))
+				continue;
+
+			stats->chain_signal_last[i] = signal;
+			if (!fast_rx->uses_rss)
+				ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+						-signal);
+		}
+	}
+	/* end of statistics */
+
+	stats->last_rx = jiffies;
+	stats->last_rate = sta_stats_encode_rate(status);
+
+	stats->fragments++;
+	stats->packets++;
+
+	skb->dev = fast_rx->dev;
+
+	dev_sw_netstats_rx_add(fast_rx->dev, skb->len);
+
+	/* The seqno index has the same property as needed
+	 * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
+	 * for non-QoS-data frames. Here we know it's a data
+	 * frame, so count MSDUs.
+	 */
+	u64_stats_update_begin(&stats->syncp);
+	stats->msdu[rx->seqno_idx]++;
+	stats->bytes += orig_len;
+	u64_stats_update_end(&stats->syncp);
+
+	if (fast_rx->internal_forward) {
+		struct sk_buff *xmit_skb = NULL;
+		if (is_multicast_ether_addr(da)) {
+			xmit_skb = skb_copy(skb, GFP_ATOMIC);
+		} else if (!ether_addr_equal(da, sa) &&
+			   sta_info_get(rx->sdata, da)) {
+			xmit_skb = skb;
+			skb = NULL;
+		}
+
+		if (xmit_skb) {
+			/*
+			 * Send to wireless media and increase priority by 256
+			 * to keep the received priority instead of
+			 * reclassifying the frame (see cfg80211_classify8021d).
+			 */
+			xmit_skb->priority += 256;
+			xmit_skb->protocol = htons(ETH_P_802_3);
+			skb_reset_network_header(xmit_skb);
+			skb_reset_mac_header(xmit_skb);
+			dev_queue_xmit(xmit_skb);
+		}
+
+		if (!skb)
+			return;
+	}
+
+	/* deliver to local stack */
+	skb->protocol = eth_type_trans(skb, fast_rx->dev);
+	memset(skb->cb, 0, sizeof(skb->cb));
+	if (rx->list)
+		list_add_tail(&skb->list, rx->list);
+	else
+		netif_receive_skb(skb);
+
+}
+
 static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 				     struct ieee80211_fast_rx *fast_rx)
 {
@@ -4274,9 +4385,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	} addrs __aligned(2);
 	struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
 
-	if (fast_rx->uses_rss)
-		stats = this_cpu_ptr(sta->pcpu_rx_stats);
-
 	/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
 	 * to a common data structure; drivers can implement that per queue
 	 * but we don't have that information in mac80211
@@ -4350,32 +4458,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	    pskb_trim(skb, skb->len - fast_rx->icv_len))
 		goto drop;
 
-	/* statistics part of ieee80211_rx_h_sta_process() */
-	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
-		stats->last_signal = status->signal;
-		if (!fast_rx->uses_rss)
-			ewma_signal_add(&sta->rx_stats_avg.signal,
-					-status->signal);
-	}
-
-	if (status->chains) {
-		int i;
-
-		stats->chains = status->chains;
-		for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
-			int signal = status->chain_signal[i];
-
-			if (!(status->chains & BIT(i)))
-				continue;
-
-			stats->chain_signal_last[i] = signal;
-			if (!fast_rx->uses_rss)
-				ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
-						-signal);
-		}
-	}
-	/* end of statistics */
-
 	if (rx->key && !ieee80211_has_protected(hdr->frame_control))
 		goto drop;
 
@@ -4387,12 +4469,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 		return true;
 	}
 
-	stats->last_rx = jiffies;
-	stats->last_rate = sta_stats_encode_rate(status);
-
-	stats->fragments++;
-	stats->packets++;
-
 	/* do the header conversion - first grab the addresses */
 	ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
 	ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
@@ -4401,58 +4477,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	/* push the addresses in front */
 	memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs));
 
-	skb->dev = fast_rx->dev;
-
-	dev_sw_netstats_rx_add(fast_rx->dev, skb->len);
-
-	/* The seqno index has the same property as needed
-	 * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
-	 * for non-QoS-data frames. Here we know it's a data
-	 * frame, so count MSDUs.
-	 */
-	u64_stats_update_begin(&stats->syncp);
-	stats->msdu[rx->seqno_idx]++;
-	stats->bytes += orig_len;
-	u64_stats_update_end(&stats->syncp);
-
-	if (fast_rx->internal_forward) {
-		struct sk_buff *xmit_skb = NULL;
-		if (is_multicast_ether_addr(addrs.da)) {
-			xmit_skb = skb_copy(skb, GFP_ATOMIC);
-		} else if (!ether_addr_equal(addrs.da, addrs.sa) &&
-			   sta_info_get(rx->sdata, addrs.da)) {
-			xmit_skb = skb;
-			skb = NULL;
-		}
-
-		if (xmit_skb) {
-			/*
-			 * Send to wireless media and increase priority by 256
-			 * to keep the received priority instead of
-			 * reclassifying the frame (see cfg80211_classify8021d).
-			 */
-			xmit_skb->priority += 256;
-			xmit_skb->protocol = htons(ETH_P_802_3);
-			skb_reset_network_header(xmit_skb);
-			skb_reset_mac_header(xmit_skb);
-			dev_queue_xmit(xmit_skb);
-		}
-
-		if (!skb)
-			return true;
-	}
-
-	/* deliver to local stack */
-	skb->protocol = eth_type_trans(skb, fast_rx->dev);
-	memset(skb->cb, 0, sizeof(skb->cb));
-	if (rx->list)
-		list_add_tail(&skb->list, rx->list);
-	else
-		netif_receive_skb(skb);
+	ieee80211_rx_8023(rx, fast_rx, orig_len);
 
 	return true;
  drop:
 	dev_kfree_skb(skb);
+	if (fast_rx->uses_rss)
+		stats = this_cpu_ptr(sta->pcpu_rx_stats);
+
 	stats->dropped++;
 	return true;
 }
@@ -4506,6 +4538,43 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
 	return true;
 }
 
+static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
+				       struct ieee80211_sta *pubsta,
+				       struct sk_buff *skb,
+				       struct list_head *list)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_fast_rx *fast_rx;
+	struct ieee80211_rx_data rx;
+
+	memset(&rx, 0, sizeof(rx));
+	rx.skb = skb;
+	rx.local = local;
+	rx.list = list;
+
+	I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
+
+	/* drop frame if too short for header */
+	if (skb->len < sizeof(struct ethhdr))
+		goto drop;
+
+	if (!pubsta)
+		goto drop;
+
+	rx.sta = container_of(pubsta, struct sta_info, sta);
+	rx.sdata = rx.sta->sdata;
+
+	fast_rx = rcu_dereference(rx.sta->fast_rx);
+	if (!fast_rx)
+		goto drop;
+
+	ieee80211_rx_8023(&rx, fast_rx, skb->len);
+	return;
+
+drop:
+	dev_kfree_skb(skb);
+}
+
 /*
  * This is the actual Rx frames handler. as it belongs to Rx path it must
  * be called with rcu_read_lock protection.
@@ -4737,13 +4806,17 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 	 * if it was previously present.
 	 * Also, frames with less than 16 bytes are dropped.
 	 */
-	skb = ieee80211_rx_monitor(local, skb, rate);
+	if (!(status->flag & RX_FLAG_8023))
+		skb = ieee80211_rx_monitor(local, skb, rate);
 	if (skb) {
 		ieee80211_tpt_led_trig_rx(local,
 					  ((struct ieee80211_hdr *)skb->data)->frame_control,
 					  skb->len);
 
-		__ieee80211_rx_handle_packet(hw, pubsta, skb, list);
+		if (status->flag & RX_FLAG_8023)
+			__ieee80211_rx_handle_8023(hw, pubsta, skb, list);
+		else
+			__ieee80211_rx_handle_packet(hw, pubsta, skb, list);
 	}
 
 	kcov_remote_stop();
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 7afd07636b81..78b9d0c7cc58 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -71,6 +71,7 @@
  *	until pending frames are delivered
  * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption,
  *	so drop all packets without a key later.
+ * @WLAN_STA_DECAP_OFFLOAD: This station uses rx decap offload
  *
  * @NUM_WLAN_STA_FLAGS: number of defined flags
  */
@@ -102,6 +103,7 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_MPSP_RECIPIENT,
 	WLAN_STA_PS_DELIVER,
 	WLAN_STA_USES_ENCRYPTION,
+	WLAN_STA_DECAP_OFFLOAD,
 
 	NUM_WLAN_STA_FLAGS,
 };
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 601322e16957..8fcc39056402 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2761,7 +2761,7 @@ DEFINE_EVENT(local_sdata_addr_evt, drv_update_vif_offload,
 	TP_ARGS(local, sdata)
 );
 
-TRACE_EVENT(drv_sta_set_4addr,
+DECLARE_EVENT_CLASS(sta_flag_evt,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_sta *sta, bool enabled),
@@ -2788,6 +2788,22 @@ TRACE_EVENT(drv_sta_set_4addr,
 	)
 );
 
+DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta, bool enabled),
+
+	TP_ARGS(local, sdata, sta, enabled)
+);
+
+DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta, bool enabled),
+
+	TP_ARGS(local, sdata, sta, enabled)
+);
+
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
                   ` (5 preceding siblings ...)
  2020-12-16 20:43 ` [PATCH 7/7] mac80211: add rx decapsulation offload support Felix Fietkau
@ 2020-12-16 20:54 ` Johannes Berg
  2020-12-16 21:28   ` Felix Fietkau
  2020-12-17 11:51 ` Toke Høiland-Jørgensen
  7 siblings, 1 reply; 28+ messages in thread
From: Johannes Berg @ 2020-12-16 20:54 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless

On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
> 
> +static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
> +			fq_skb_free_t free_func)
> +{
> +	unsigned int packets = 0, bytes = 0, truesize = 0;
> +	struct fq_tin *tin = flow->tin;
> +	struct sk_buff *skb;
> +	int pending;
> +
> +	lockdep_assert_held(&fq->lock);
> +
> +	pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
> 

Why 32?

johannes


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows
  2020-12-16 20:43 ` [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows Felix Fietkau
@ 2020-12-16 20:59   ` Johannes Berg
  2020-12-17 12:40     ` Felix Fietkau
  0 siblings, 1 reply; 28+ messages in thread
From: Johannes Berg @ 2020-12-16 20:59 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless

On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
> 
> +	u32 *flows_bitmap;

> +	fq->flows_bitmap[idx / 32] &= ~BIT(idx % 32);

> +	for (base = 0; base < fq->flows_cnt; base += 32) {
> +		u32 mask = fq->flows_bitmap[base / 32];

This all seems a little awkward, why not use unsigned long * and
<linux/bitops.h>?

The &=~ BIT() thing above is basically __clear_bit() then, the loops
later are basically for_each_set_bit() if I'm reading things right.

> +	if (!flow->backlog) {
> +		if (flow != &tin->default_flow)
> +			fq->flows_bitmap[idx / 32] |= BIT(idx % 32);

That could be __set_bit()

> +	fq->flows_bitmap = kcalloc(DIV_ROUND_UP(fq->flows_cnt, 32), sizeof(u32),
> +				   GFP_KERNEL);

And that would just use BITS_TO_BYTES()?

johannes


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 7/7] mac80211: add rx decapsulation offload support
  2020-12-16 20:43 ` [PATCH 7/7] mac80211: add rx decapsulation offload support Felix Fietkau
@ 2020-12-16 21:03   ` Johannes Berg
  2020-12-16 21:19     ` Felix Fietkau
  2020-12-16 21:04   ` Johannes Berg
  1 sibling, 1 reply; 28+ messages in thread
From: Johannes Berg @ 2020-12-16 21:03 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless

On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
> 
> +	offload = assign &&
> +		  (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
> +
> +	if (offload)
> +		set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
> +	else
> +		set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
> +
> +	if (set_offload)
> +		drv_sta_set_decap_offload(local, sdata, &sta->sta, assign);

Some of these lines look a bit long?

> -	skb = ieee80211_rx_monitor(local, skb, rate);
> +	if (!(status->flag & RX_FLAG_8023))
> +		skb = ieee80211_rx_monitor(local, skb, rate);

Is that worth the check? You basically disable it anyway if monitor
interfaces are there.

Not sure that's really the right thing to do ... we often want monitor
interfaces (with no flags set) for debug?

Or maybe we should add some tracing then (instead).

johannes


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 7/7] mac80211: add rx decapsulation offload support
  2020-12-16 20:43 ` [PATCH 7/7] mac80211: add rx decapsulation offload support Felix Fietkau
  2020-12-16 21:03   ` Johannes Berg
@ 2020-12-16 21:04   ` Johannes Berg
  2020-12-16 21:06     ` Felix Fietkau
  1 sibling, 1 reply; 28+ messages in thread
From: Johannes Berg @ 2020-12-16 21:04 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless

Wait, another thing:

> +++ b/net/mac80211/iface.c
> @@ -798,10 +798,21 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat
>  		flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
>  	}
>  
> +	if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) &&
> +	    ieee80211_iftype_supports_encap_offload(sdata->vif.type)) {
> +		flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;

Why does decap depend on encap here?

johannes



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 7/7] mac80211: add rx decapsulation offload support
  2020-12-16 21:04   ` Johannes Berg
@ 2020-12-16 21:06     ` Felix Fietkau
  0 siblings, 0 replies; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 21:06 UTC (permalink / raw)
  To: Johannes Berg, linux-wireless


On 2020-12-16 22:04, Johannes Berg wrote:
> Wait, another thing:
> 
>> +++ b/net/mac80211/iface.c
>> @@ -798,10 +798,21 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat
>>  		flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
>>  	}
>>  
>> +	if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) &&
>> +	    ieee80211_iftype_supports_encap_offload(sdata->vif.type)) {
>> +		flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;
> 
> Why does decap depend on encap here?
Supported for the same types, I didn't feel like duplicating the
function. I guess I could rename it to
ieee80211_iftype_supports_hdr_offload to make it more clear.

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 7/7] mac80211: add rx decapsulation offload support
  2020-12-16 21:03   ` Johannes Berg
@ 2020-12-16 21:19     ` Felix Fietkau
  2020-12-17  8:08       ` Johannes Berg
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 21:19 UTC (permalink / raw)
  To: Johannes Berg, linux-wireless

On 2020-12-16 22:03, Johannes Berg wrote:
> On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
>> 
>> +	offload = assign &&
>> +		  (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
>> +
>> +	if (offload)
>> +		set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
>> +	else
>> +		set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
>> +
>> +	if (set_offload)
>> +		drv_sta_set_decap_offload(local, sdata, &sta->sta, assign);
> 
> Some of these lines look a bit long?
Just a tiny bit over 80 characters. Wasn't the 80 characters line limit
removed a while back? I don't think line wrapping would make things more
readable here.

>> -	skb = ieee80211_rx_monitor(local, skb, rate);
>> +	if (!(status->flag & RX_FLAG_8023))
>> +		skb = ieee80211_rx_monitor(local, skb, rate);
> 
> Is that worth the check? You basically disable it anyway if monitor
> interfaces are there.
There could be a race. The driver or hw might have queued up some 802.3
frames after offload was disabled.

> Not sure that's really the right thing to do ... we often want monitor
> interfaces (with no flags set) for debug?
> 
> Or maybe we should add some tracing then (instead).
Tracing probably makes more sense. I'm not sure pcap or radiotap can
deal with a mix of 802.3 and 802.11 packets. Leaving offload enabled and
silently dropping 802.3 packets seems like a bad idea to me as well.

- Felix


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory
  2020-12-16 20:54 ` [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Johannes Berg
@ 2020-12-16 21:28   ` Felix Fietkau
  2020-12-17 12:09     ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-16 21:28 UTC (permalink / raw)
  To: Johannes Berg, linux-wireless

On 2020-12-16 21:54, Johannes Berg wrote:
> On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
>> 
>> +static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
>> +			fq_skb_free_t free_func)
>> +{
>> +	unsigned int packets = 0, bytes = 0, truesize = 0;
>> +	struct fq_tin *tin = flow->tin;
>> +	struct sk_buff *skb;
>> +	int pending;
>> +
>> +	lockdep_assert_held(&fq->lock);
>> +
>> +	pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
>> 
> 
> Why 32?
I guess I forgot got make it configurable. sch_fq_codel uses 64, but
that seemed a bit excessive to me.

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 7/7] mac80211: add rx decapsulation offload support
  2020-12-16 21:19     ` Felix Fietkau
@ 2020-12-17  8:08       ` Johannes Berg
  0 siblings, 0 replies; 28+ messages in thread
From: Johannes Berg @ 2020-12-17  8:08 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless

On Wed, 2020-12-16 at 22:19 +0100, Felix Fietkau wrote:
> On 2020-12-16 22:03, Johannes Berg wrote:
> > On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
> > > +	offload = assign &&
> > > +		  (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
> > > +
> > > +	if (offload)
> > > +		set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
> > > +	else
> > > +		set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
> > > +
> > > +	if (set_offload)
> > > +		drv_sta_set_decap_offload(local, sdata, &sta->sta, assign);
> > 
> > Some of these lines look a bit long?
> Just a tiny bit over 80 characters. Wasn't the 80 characters line limit
> removed a while back? I don't think line wrapping would make things more
> readable here.

Ok, fair point, I didn't count :-)

> > Not sure that's really the right thing to do ... we often want monitor
> > interfaces (with no flags set) for debug?
> > 
> > Or maybe we should add some tracing then (instead).
> Tracing probably makes more sense. I'm not sure pcap or radiotap can
> deal with a mix of 802.3 and 802.11 packets. Leaving offload enabled and
> silently dropping 802.3 packets seems like a bad idea to me as well.

Right. I've long felt that perhaps we should have tracing for this,
rather than relying on the extra monitor interface, but the monitor
interface is oh so convenient since you can directly use the result for
wireshark etc. :)

I guess I don't really care that much either way. Going with your
approach seems reasonable, but people will have to be aware that "just
add a monitor interface" is now going to affect things more than it used
to.

johannes


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory
  2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
                   ` (6 preceding siblings ...)
  2020-12-16 20:54 ` [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Johannes Berg
@ 2020-12-17 11:51 ` Toke Høiland-Jørgensen
  7 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-17 11:51 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> This is similar to what sch_fq_codel does. It also amortizes the worst
> case cost of a follow-up patch that changes the selection of the biggest
> flow for dropping packets
>
> Signed-off-by: Felix Fietkau <nbd@nbd.name>

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-16 20:43 ` [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing Felix Fietkau
@ 2020-12-17 11:54   ` Toke Høiland-Jørgensen
  2020-12-17 12:20     ` Felix Fietkau
  0 siblings, 1 reply; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-17 11:54 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> Depending on the source, a hardware calculated hash may not provide the
> same level of collision resistance.

This seems like it would have performance implications?

Also, this can potentially discard information from tunnels that
preserve the hash before encapsulation (we added support for this to
Wireguard which had some nice effects on queueing of encapsulated
traffic).

Could you elaborate a bit on the kind of bad hardware hashes you were
seeing?

-Toke


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/7] net/fq_impl: drop get_default_func, move default flow to fq_tin
  2020-12-16 20:43 ` [PATCH 3/7] net/fq_impl: drop get_default_func, move default flow to fq_tin Felix Fietkau
@ 2020-12-17 11:55   ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-17 11:55 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> Simplifies the code and prepares for a rework of scanning for flows on
> overmemory drop.
>
> Signed-off-by: Felix Fietkau <nbd@nbd.name>

This seems reasonable.

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory
  2020-12-16 21:28   ` Felix Fietkau
@ 2020-12-17 12:09     ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-17 12:09 UTC (permalink / raw)
  To: Felix Fietkau, Johannes Berg, linux-wireless

Felix Fietkau <nbd@nbd.name> writes:

> On 2020-12-16 21:54, Johannes Berg wrote:
>> On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
>>> 
>>> +static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
>>> +			fq_skb_free_t free_func)
>>> +{
>>> +	unsigned int packets = 0, bytes = 0, truesize = 0;
>>> +	struct fq_tin *tin = flow->tin;
>>> +	struct sk_buff *skb;
>>> +	int pending;
>>> +
>>> +	lockdep_assert_held(&fq->lock);
>>> +
>>> +	pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
>>> 
>> 
>> Why 32?
> I guess I forgot got make it configurable. sch_fq_codel uses 64, but
> that seemed a bit excessive to me.

I'm not sure it's worth a configuration knob. It's basically an
arbitrary choice anyway, and only kicks in when an unresponsive flows
keeps flooding a queue to the point of overflow (if it's many smaller
flows the "never drop more than half a flow's backlog" should keep it
from being excessive).

This (hopefully) only happens relatively rarely and hitting it with a
really big hammer is the right thing to do in such a case to keep the
box from falling over. Not sure if 32 or 64 makes much difference; guess
it depends on the CPU-to-bandwidth ratio of the particular machine.

-Toke


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-17 11:54   ` Toke Høiland-Jørgensen
@ 2020-12-17 12:20     ` Felix Fietkau
  2020-12-17 13:01       ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-17 12:20 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, linux-wireless; +Cc: johannes


On 2020-12-17 12:54, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <nbd@nbd.name> writes:
> 
>> Depending on the source, a hardware calculated hash may not provide the
>> same level of collision resistance.
> 
> This seems like it would have performance implications?
> 
> Also, this can potentially discard information from tunnels that
> preserve the hash before encapsulation (we added support for this to
> Wireguard which had some nice effects on queueing of encapsulated
> traffic).
If the hash was calculated in software using the flow dissector, it will
be preserved, even if it went through a few virtual interfaces.
The only hashes discarded are hardware generated ones.

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows
  2020-12-16 20:59   ` Johannes Berg
@ 2020-12-17 12:40     ` Felix Fietkau
  0 siblings, 0 replies; 28+ messages in thread
From: Felix Fietkau @ 2020-12-17 12:40 UTC (permalink / raw)
  To: Johannes Berg, linux-wireless

On 2020-12-16 21:59, Johannes Berg wrote:
> On Wed, 2020-12-16 at 21:43 +0100, Felix Fietkau wrote:
>> 
>> +	u32 *flows_bitmap;
> 
>> +	fq->flows_bitmap[idx / 32] &= ~BIT(idx % 32);
> 
>> +	for (base = 0; base < fq->flows_cnt; base += 32) {
>> +		u32 mask = fq->flows_bitmap[base / 32];
> 
> This all seems a little awkward, why not use unsigned long * and
> <linux/bitops.h>?
> 
> The &=~ BIT() thing above is basically __clear_bit() then, the loops
> later are basically for_each_set_bit() if I'm reading things right.
I guess I can simplify this some more. I think I avoided
for_each_set_bit for performance reasons to keep things inline in the
loop, but I'm not sure how much that matters in practice.

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-17 12:20     ` Felix Fietkau
@ 2020-12-17 13:01       ` Toke Høiland-Jørgensen
  2020-12-17 15:48         ` Felix Fietkau
  0 siblings, 1 reply; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-17 13:01 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> On 2020-12-17 12:54, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <nbd@nbd.name> writes:
>> 
>>> Depending on the source, a hardware calculated hash may not provide the
>>> same level of collision resistance.
>> 
>> This seems like it would have performance implications?
>> 
>> Also, this can potentially discard information from tunnels that
>> preserve the hash before encapsulation (we added support for this to
>> Wireguard which had some nice effects on queueing of encapsulated
>> traffic).
> If the hash was calculated in software using the flow dissector, it will
> be preserved, even if it went through a few virtual interfaces.
> The only hashes discarded are hardware generated ones.

Yeah, but I was thinking something like:

Packet comes in with HW hash -> gets encapsulated (preserving the hash)
-> gets to mac80211 which discards the HW hash. So now you're replacing
a (possibly bad-quality) HW hash with a software hash of the *outer*
encapsulation header...

-Toke


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-17 13:01       ` Toke Høiland-Jørgensen
@ 2020-12-17 15:48         ` Felix Fietkau
  2020-12-17 17:26           ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-17 15:48 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, linux-wireless; +Cc: johannes


On 2020-12-17 14:01, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <nbd@nbd.name> writes:
> 
>> On 2020-12-17 12:54, Toke Høiland-Jørgensen wrote:
>>> Felix Fietkau <nbd@nbd.name> writes:
>>> 
>>>> Depending on the source, a hardware calculated hash may not provide the
>>>> same level of collision resistance.
>>> 
>>> This seems like it would have performance implications?
>>> 
>>> Also, this can potentially discard information from tunnels that
>>> preserve the hash before encapsulation (we added support for this to
>>> Wireguard which had some nice effects on queueing of encapsulated
>>> traffic).
>> If the hash was calculated in software using the flow dissector, it will
>> be preserved, even if it went through a few virtual interfaces.
>> The only hashes discarded are hardware generated ones.
> 
> Yeah, but I was thinking something like:
> 
> Packet comes in with HW hash -> gets encapsulated (preserving the hash)
> -> gets to mac80211 which discards the HW hash. So now you're replacing
> a (possibly bad-quality) HW hash with a software hash of the *outer*
> encapsulation header...
If this becomes a problem, I think we should add a similar patch to
wireguard, which already calls skb_get_hash before encapsulating.
Other regular tunnels should already get a proper hash, since the flow
dissector will take care of it.

The reason I did this patch is because I have a patch to set the hw flow
hash in the skb on mtk_eth_soc, which does help GRO, but leads to
collisions on mac80211 fq.

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-17 15:48         ` Felix Fietkau
@ 2020-12-17 17:26           ` Toke Høiland-Jørgensen
  2020-12-17 19:07             ` Felix Fietkau
  0 siblings, 1 reply; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-17 17:26 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> On 2020-12-17 14:01, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <nbd@nbd.name> writes:
>> 
>>> On 2020-12-17 12:54, Toke Høiland-Jørgensen wrote:
>>>> Felix Fietkau <nbd@nbd.name> writes:
>>>> 
>>>>> Depending on the source, a hardware calculated hash may not provide the
>>>>> same level of collision resistance.
>>>> 
>>>> This seems like it would have performance implications?
>>>> 
>>>> Also, this can potentially discard information from tunnels that
>>>> preserve the hash before encapsulation (we added support for this to
>>>> Wireguard which had some nice effects on queueing of encapsulated
>>>> traffic).
>>> If the hash was calculated in software using the flow dissector, it will
>>> be preserved, even if it went through a few virtual interfaces.
>>> The only hashes discarded are hardware generated ones.
>> 
>> Yeah, but I was thinking something like:
>> 
>> Packet comes in with HW hash -> gets encapsulated (preserving the hash)
>> -> gets to mac80211 which discards the HW hash. So now you're replacing
>> a (possibly bad-quality) HW hash with a software hash of the *outer*
>> encapsulation header...
> If this becomes a problem, I think we should add a similar patch to
> wireguard, which already calls skb_get_hash before encapsulating.
> Other regular tunnels should already get a proper hash, since the flow
> dissector will take care of it.

But then we'd need to go around adding this to all the places that uses
the hash just to work around a particular piece of broken(ish) hardware.
And we're hard-coding a behaviour in mac80211 that means we'll *always*
recompute the hash, even for hardware that's not similarly broken.

> The reason I did this patch is because I have a patch to set the hw flow
> hash in the skb on mtk_eth_soc, which does help GRO, but leads to
> collisions on mac80211 fq.

So wouldn't the right thing to do here be to put a flag into the RX
device that makes the stack clear the hash after using it for GRO?

-Toke


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-17 17:26           ` Toke Høiland-Jørgensen
@ 2020-12-17 19:07             ` Felix Fietkau
  2020-12-18 12:41               ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-17 19:07 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, linux-wireless; +Cc: johannes


On 2020-12-17 18:26, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <nbd@nbd.name> writes:
>> If this becomes a problem, I think we should add a similar patch to
>> wireguard, which already calls skb_get_hash before encapsulating.
>> Other regular tunnels should already get a proper hash, since the flow
>> dissector will take care of it.
> 
> But then we'd need to go around adding this to all the places that uses
> the hash just to work around a particular piece of broken(ish) hardware.
> And we're hard-coding a behaviour in mac80211 that means we'll *always*
> recompute the hash, even for hardware that's not similarly broken.
> 
>> The reason I did this patch is because I have a patch to set the hw flow
>> hash in the skb on mtk_eth_soc, which does help GRO, but leads to
>> collisions on mac80211 fq.
> 
> So wouldn't the right thing to do here be to put a flag into the RX
> device that makes the stack clear the hash after using it for GRO?
I don't think the hardware is broken, I think fq is simply making
assumptions about the hash that aren't met by the hw.

The documentation in include/linux/skbuff.h mentions these requirements
for the skb hash:
 * 1) Two packets in different flows have different hash values
 * 2) Two packets in the same flow should have the same hash value

FWIW, I think the 'should' from 2) probably belongs to 1), otherwise it
makes no sense. Two packets of the flow must return the same hash,
otherwise the hash is broken. I'm assuming this is a typo.

In addition to those properties, fq needs the hash to be
cryptographically secure, so that it can use reciprocal_scale to sort
flows into buckets without allowing an attacker to craft collisions.
That's also the reason why it used to use skb_get_hash_perturb with a
random perturbation until we got software hashes based on siphash.

I think it's safe to assume that most hardware out there will not
provide collision resistant hashes, so in my opinion fq cannot rely on a
hardware hash. We don't need to go around and change all places that use
the hash, just those that assume a collision resistant one.

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-17 19:07             ` Felix Fietkau
@ 2020-12-18 12:41               ` Toke Høiland-Jørgensen
  2020-12-18 13:40                 ` Felix Fietkau
  0 siblings, 1 reply; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-18 12:41 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> On 2020-12-17 18:26, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <nbd@nbd.name> writes:
>>> If this becomes a problem, I think we should add a similar patch to
>>> wireguard, which already calls skb_get_hash before encapsulating.
>>> Other regular tunnels should already get a proper hash, since the flow
>>> dissector will take care of it.
>> 
>> But then we'd need to go around adding this to all the places that uses
>> the hash just to work around a particular piece of broken(ish) hardware.
>> And we're hard-coding a behaviour in mac80211 that means we'll *always*
>> recompute the hash, even for hardware that's not similarly broken.
>> 
>>> The reason I did this patch is because I have a patch to set the hw flow
>>> hash in the skb on mtk_eth_soc, which does help GRO, but leads to
>>> collisions on mac80211 fq.
>> 
>> So wouldn't the right thing to do here be to put a flag into the RX
>> device that makes the stack clear the hash after using it for GRO?
> I don't think the hardware is broken, I think fq is simply making
> assumptions about the hash that aren't met by the hw.
>
> The documentation in include/linux/skbuff.h mentions these requirements
> for the skb hash:
>  * 1) Two packets in different flows have different hash values
>  * 2) Two packets in the same flow should have the same hash value
>
> FWIW, I think the 'should' from 2) probably belongs to 1), otherwise it
> makes no sense. Two packets of the flow must return the same hash,
> otherwise the hash is broken. I'm assuming this is a typo.

There's some text further down indicating this is deliberate:

 * A driver may indicate a hash level which is less specific than the
 * actual layer the hash was computed on. For instance, a hash computed
 * at L4 may be considered an L3 hash. This should only be done if the
 * driver can't unambiguously determine that the HW computed the hash at
 * the higher layer. Note that the "should" in the second property above
 * permits this.

So the way I'm reading that whole section, either the intent is that
both properties should be fulfilled, or that the first one (being
collision-free) is more important...

> In addition to those properties, fq needs the hash to be
> cryptographically secure, so that it can use reciprocal_scale to sort
> flows into buckets without allowing an attacker to craft collisions.
> That's also the reason why it used to use skb_get_hash_perturb with a
> random perturbation until we got software hashes based on siphash.
>
> I think it's safe to assume that most hardware out there will not
> provide collision resistant hashes, so in my opinion fq cannot rely on a
> hardware hash. We don't need to go around and change all places that use
> the hash, just those that assume a collision resistant one.

I did a quick grep-based survey of uses of skb_get_hash() outside
drivers - this is what I found (with my interpretations of what they're
used for):

net/core/dev.c           : skb_tx_hash() - selecting TX queue w/reciprocal scale
net/core/dev.c           : RX flow steering, flow limiting
net/core/dev.c           : GRO
net/core/filter.c        : BPF helper
include/net/ip_tunnels.h : flowi4_multipath_hash - so multipath selection?
net/ipv{4,6}/route.c     : multipath hashing (if l4)
net/ipv6/seg6_iptunnel   : building flow labels
net/mac80211/tx.c        : FQ
net/mptcp/syncookies     : storing cookies (XOR w/net_hash_mix())
net/netfilter/nft_hash.c : symhash input (seems to be load balancing)
net/openvswitch          : flow hashing and actions
net/packet/af_packet.c   : PACKET_FANOUT_HASH
net/sched/sch_*.c        : flow hashing for queueing

Apart from GRO it's not obvious to me that a trivially
attacker-controlled hash is safe in any of those uses?

-Toke


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-18 12:41               ` Toke Høiland-Jørgensen
@ 2020-12-18 13:40                 ` Felix Fietkau
  2020-12-18 15:49                   ` Toke Høiland-Jørgensen
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Fietkau @ 2020-12-18 13:40 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen, linux-wireless; +Cc: johannes

On 2020-12-18 13:41, Toke Høiland-Jørgensen wrote:
> Felix Fietkau <nbd@nbd.name> writes:
> 
>> On 2020-12-17 18:26, Toke Høiland-Jørgensen wrote:
>>> Felix Fietkau <nbd@nbd.name> writes:
>>>> If this becomes a problem, I think we should add a similar patch to
>>>> wireguard, which already calls skb_get_hash before encapsulating.
>>>> Other regular tunnels should already get a proper hash, since the flow
>>>> dissector will take care of it.
>>> 
>>> But then we'd need to go around adding this to all the places that uses
>>> the hash just to work around a particular piece of broken(ish) hardware.
>>> And we're hard-coding a behaviour in mac80211 that means we'll *always*
>>> recompute the hash, even for hardware that's not similarly broken.
>>> 
>>>> The reason I did this patch is because I have a patch to set the hw flow
>>>> hash in the skb on mtk_eth_soc, which does help GRO, but leads to
>>>> collisions on mac80211 fq.
>>> 
>>> So wouldn't the right thing to do here be to put a flag into the RX
>>> device that makes the stack clear the hash after using it for GRO?
>> I don't think the hardware is broken, I think fq is simply making
>> assumptions about the hash that aren't met by the hw.
>>
>> The documentation in include/linux/skbuff.h mentions these requirements
>> for the skb hash:
>>  * 1) Two packets in different flows have different hash values
>>  * 2) Two packets in the same flow should have the same hash value
>>
>> FWIW, I think the 'should' from 2) probably belongs to 1), otherwise it
>> makes no sense. Two packets of the flow must return the same hash,
>> otherwise the hash is broken. I'm assuming this is a typo.
> 
> There's some text further down indicating this is deliberate:
> 
>  * A driver may indicate a hash level which is less specific than the
>  * actual layer the hash was computed on. For instance, a hash computed
>  * at L4 may be considered an L3 hash. This should only be done if the
>  * driver can't unambiguously determine that the HW computed the hash at
>  * the higher layer. Note that the "should" in the second property above
>  * permits this.
> 
> So the way I'm reading that whole section, either the intent is that
> both properties should be fulfilled, or that the first one (being
> collision-free) is more important...
A hash - by definition - cannot be collision free.
But that's beside the point. On my hw, the hash itself seems collision
free for the flows that I'm pushing, but the result of the
reciprocal_scale isn't.
I took another look and figured out the reason for that:
The hw delivers a 14 bit hash. reciprocal_scale assumes that the values
are distributed across the full 32 bit range. So in this case, the lower
bits are pretty much ignored and the result of the reciprocal_scale is 0
or close to 0, which is what's causing the collisions in fq.

Maybe the assumption that the hash should be distributed across the full
32 bit range should be documented somewhere :)

>> In addition to those properties, fq needs the hash to be
>> cryptographically secure, so that it can use reciprocal_scale to sort
>> flows into buckets without allowing an attacker to craft collisions.
>> That's also the reason why it used to use skb_get_hash_perturb with a
>> random perturbation until we got software hashes based on siphash.
>>
>> I think it's safe to assume that most hardware out there will not
>> provide collision resistant hashes, so in my opinion fq cannot rely on a
>> hardware hash. We don't need to go around and change all places that use
>> the hash, just those that assume a collision resistant one.
> 
> I did a quick grep-based survey of uses of skb_get_hash() outside
> drivers - this is what I found (with my interpretations of what they're
> used for):
> 
> net/core/dev.c           : skb_tx_hash() - selecting TX queue w/reciprocal scale
> net/core/dev.c           : RX flow steering, flow limiting
> net/core/dev.c           : GRO
> net/core/filter.c        : BPF helper
> include/net/ip_tunnels.h : flowi4_multipath_hash - so multipath selection?
> net/ipv{4,6}/route.c     : multipath hashing (if l4)
> net/ipv6/seg6_iptunnel   : building flow labels
> net/mac80211/tx.c        : FQ
> net/mptcp/syncookies     : storing cookies (XOR w/net_hash_mix())
> net/netfilter/nft_hash.c : symhash input (seems to be load balancing)
> net/openvswitch          : flow hashing and actions
> net/packet/af_packet.c   : PACKET_FANOUT_HASH
> net/sched/sch_*.c        : flow hashing for queueing
> 
> Apart from GRO it's not obvious to me that a trivially
> attacker-controlled hash is safe in any of those uses?
I looked at some of those uses you mentioned here.
Most of them fit into 2 categories:
1. Sort into power-of-2 buckets and use hash & (size-1), effectively
using the lower bits only.
2. Use reciprocal_scale - effectively using the higher bits only.
For the hash that my hw is reporting, type 1 is working and type 2 is
broken.

So it seems to me that the solution would involve running a simple hash
on the 14 bit values to get the bits distributed to the full 32 bit
range without adding too much bias.
I will do this in the driver and drop this patch.

Thanks for looking into this,

- Felix

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing
  2020-12-18 13:40                 ` Felix Fietkau
@ 2020-12-18 15:49                   ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 28+ messages in thread
From: Toke Høiland-Jørgensen @ 2020-12-18 15:49 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> On 2020-12-18 13:41, Toke Høiland-Jørgensen wrote:
>> Felix Fietkau <nbd@nbd.name> writes:
>> 
>>> On 2020-12-17 18:26, Toke Høiland-Jørgensen wrote:
>>>> Felix Fietkau <nbd@nbd.name> writes:
>>>>> If this becomes a problem, I think we should add a similar patch to
>>>>> wireguard, which already calls skb_get_hash before encapsulating.
>>>>> Other regular tunnels should already get a proper hash, since the flow
>>>>> dissector will take care of it.
>>>> 
>>>> But then we'd need to go around adding this to all the places that uses
>>>> the hash just to work around a particular piece of broken(ish) hardware.
>>>> And we're hard-coding a behaviour in mac80211 that means we'll *always*
>>>> recompute the hash, even for hardware that's not similarly broken.
>>>> 
>>>>> The reason I did this patch is because I have a patch to set the hw flow
>>>>> hash in the skb on mtk_eth_soc, which does help GRO, but leads to
>>>>> collisions on mac80211 fq.
>>>> 
>>>> So wouldn't the right thing to do here be to put a flag into the RX
>>>> device that makes the stack clear the hash after using it for GRO?
>>> I don't think the hardware is broken, I think fq is simply making
>>> assumptions about the hash that aren't met by the hw.
>>>
>>> The documentation in include/linux/skbuff.h mentions these requirements
>>> for the skb hash:
>>>  * 1) Two packets in different flows have different hash values
>>>  * 2) Two packets in the same flow should have the same hash value
>>>
>>> FWIW, I think the 'should' from 2) probably belongs to 1), otherwise it
>>> makes no sense. Two packets of the flow must return the same hash,
>>> otherwise the hash is broken. I'm assuming this is a typo.
>> 
>> There's some text further down indicating this is deliberate:
>> 
>>  * A driver may indicate a hash level which is less specific than the
>>  * actual layer the hash was computed on. For instance, a hash computed
>>  * at L4 may be considered an L3 hash. This should only be done if the
>>  * driver can't unambiguously determine that the HW computed the hash at
>>  * the higher layer. Note that the "should" in the second property above
>>  * permits this.
>> 
>> So the way I'm reading that whole section, either the intent is that
>> both properties should be fulfilled, or that the first one (being
>> collision-free) is more important...
> A hash - by definition - cannot be collision free.
> But that's beside the point. On my hw, the hash itself seems collision
> free for the flows that I'm pushing, but the result of the
> reciprocal_scale isn't.
> I took another look and figured out the reason for that:
> The hw delivers a 14 bit hash. reciprocal_scale assumes that the values
> are distributed across the full 32 bit range. So in this case, the lower
> bits are pretty much ignored and the result of the reciprocal_scale is 0
> or close to 0, which is what's causing the collisions in fq.

Ah, right, that makes sense!

> Maybe the assumption that the hash should be distributed across the full
> 32 bit range should be documented somewhere :)

Yeah, I agree. Maybe just updating that comment in skbuff.h? Do you want
to fold such an update into your series? Otherwise I can send a patch
once net-next opens...

>>> In addition to those properties, fq needs the hash to be
>>> cryptographically secure, so that it can use reciprocal_scale to sort
>>> flows into buckets without allowing an attacker to craft collisions.
>>> That's also the reason why it used to use skb_get_hash_perturb with a
>>> random perturbation until we got software hashes based on siphash.
>>>
>>> I think it's safe to assume that most hardware out there will not
>>> provide collision resistant hashes, so in my opinion fq cannot rely on a
>>> hardware hash. We don't need to go around and change all places that use
>>> the hash, just those that assume a collision resistant one.
>> 
>> I did a quick grep-based survey of uses of skb_get_hash() outside
>> drivers - this is what I found (with my interpretations of what they're
>> used for):
>> 
>> net/core/dev.c           : skb_tx_hash() - selecting TX queue w/reciprocal scale
>> net/core/dev.c           : RX flow steering, flow limiting
>> net/core/dev.c           : GRO
>> net/core/filter.c        : BPF helper
>> include/net/ip_tunnels.h : flowi4_multipath_hash - so multipath selection?
>> net/ipv{4,6}/route.c     : multipath hashing (if l4)
>> net/ipv6/seg6_iptunnel   : building flow labels
>> net/mac80211/tx.c        : FQ
>> net/mptcp/syncookies     : storing cookies (XOR w/net_hash_mix())
>> net/netfilter/nft_hash.c : symhash input (seems to be load balancing)
>> net/openvswitch          : flow hashing and actions
>> net/packet/af_packet.c   : PACKET_FANOUT_HASH
>> net/sched/sch_*.c        : flow hashing for queueing
>> 
>> Apart from GRO it's not obvious to me that a trivially
>> attacker-controlled hash is safe in any of those uses?
> I looked at some of those uses you mentioned here.
> Most of them fit into 2 categories:
> 1. Sort into power-of-2 buckets and use hash & (size-1), effectively
> using the lower bits only.
> 2. Use reciprocal_scale - effectively using the higher bits only.
> For the hash that my hw is reporting, type 1 is working and type 2 is
> broken.
>
> So it seems to me that the solution would involve running a simple hash
> on the 14 bit values to get the bits distributed to the full 32 bit
> range without adding too much bias.
> I will do this in the driver and drop this patch.

Yes, this seems like a reasonable solution; great!

> Thanks for looking into this,

You're welcome :)

-Toke


^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2020-12-18 15:50 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-16 20:43 [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Felix Fietkau
2020-12-16 20:43 ` [PATCH 2/7] mac80211: force calculation of software hash for tx fair queueing Felix Fietkau
2020-12-17 11:54   ` Toke Høiland-Jørgensen
2020-12-17 12:20     ` Felix Fietkau
2020-12-17 13:01       ` Toke Høiland-Jørgensen
2020-12-17 15:48         ` Felix Fietkau
2020-12-17 17:26           ` Toke Høiland-Jørgensen
2020-12-17 19:07             ` Felix Fietkau
2020-12-18 12:41               ` Toke Høiland-Jørgensen
2020-12-18 13:40                 ` Felix Fietkau
2020-12-18 15:49                   ` Toke Høiland-Jørgensen
2020-12-16 20:43 ` [PATCH 3/7] net/fq_impl: drop get_default_func, move default flow to fq_tin Felix Fietkau
2020-12-17 11:55   ` Toke Høiland-Jørgensen
2020-12-16 20:43 ` [PATCH 4/7] net/fq_impl: do not maintain a backlog-sorted list of flows Felix Fietkau
2020-12-16 20:59   ` Johannes Berg
2020-12-17 12:40     ` Felix Fietkau
2020-12-16 20:43 ` [PATCH 5/7] mac80211: fix encryption key selection for 802.3 xmit Felix Fietkau
2020-12-16 20:43 ` [PATCH 6/7] mac80211: fix fast-rx encryption check Felix Fietkau
2020-12-16 20:43 ` [PATCH 7/7] mac80211: add rx decapsulation offload support Felix Fietkau
2020-12-16 21:03   ` Johannes Berg
2020-12-16 21:19     ` Felix Fietkau
2020-12-17  8:08       ` Johannes Berg
2020-12-16 21:04   ` Johannes Berg
2020-12-16 21:06     ` Felix Fietkau
2020-12-16 20:54 ` [PATCH 1/7] net/fq_impl: bulk-free packets from a flow on overmemory Johannes Berg
2020-12-16 21:28   ` Felix Fietkau
2020-12-17 12:09     ` Toke Høiland-Jørgensen
2020-12-17 11:51 ` Toke Høiland-Jørgensen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.