linux-wireless.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] mac80211: add a function for running rx without passing skbs to the stack
@ 2020-07-25 18:55 Felix Fietkau
  2020-07-25 19:14 ` Johannes Berg
  0 siblings, 1 reply; 5+ messages in thread
From: Felix Fietkau @ 2020-07-25 18:55 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes

This can be used to run mac80211 rx processing on a batch of frames in NAPI
poll before passing them to the network stack in a large batch.
This can improve icache footprint, or it can be used to pass frames via
netif_receive_skb_list.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/mac80211.h     | 25 +++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/rx.c          | 56 +++++++++++++++++++++++---------------
 3 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 11d5610d2ad5..cc9c477184a4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4357,6 +4357,31 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);
  */
 void ieee80211_restart_hw(struct ieee80211_hw *hw);
 
+/**
+ * ieee80211_rx_list - receive frame and store processed skbs in a list
+ *
+ * Use this function to hand received frames to mac80211. The receive
+ * buffer in @skb must start with an IEEE 802.11 header. In case of a
+ * paged @skb is used, the driver is recommended to put the ieee80211
+ * header of the frame on the linear part of the @skb to avoid memory
+ * allocation and/or memcpy by the stack.
+ *
+ * This function may not be called in IRQ context. Calls to this function
+ * for a single hardware must be synchronized against each other. Calls to
+ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
+ * mixed for a single hardware. Must not run concurrently with
+ * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ *
+ * This function must be called with BHs disabled and RCU read lock
+ *
+ * @hw: the hardware this frame came in on
+ * @sta: the station the frame was received from, or %NULL
+ * @skb: the buffer to receive, owned by mac80211 after this call
+ * @list: the destination list
+ */
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
+		       struct sk_buff *skb, struct sk_buff_head *list);
+
 /**
  * ieee80211_rx_napi - receive frame from NAPI context
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 28b154c6e72d..13aad8c3a16f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -218,7 +218,7 @@ enum ieee80211_rx_flags {
 };
 
 struct ieee80211_rx_data {
-	struct napi_struct *napi;
+	struct sk_buff_head *list;
 	struct sk_buff *skb;
 	struct ieee80211_local *local;
 	struct ieee80211_sub_if_data *sdata;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5c5af4b5fc08..0026b58783bd 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2578,8 +2578,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
 		memset(skb->cb, 0, sizeof(skb->cb));
 
 		/* deliver to local stack */
-		if (rx->napi)
-			napi_gro_receive(rx->napi, skb);
+		if (rx->list)
+			__skb_queue_tail(rx->list, skb);
 		else
 			netif_receive_skb(skb);
 	}
@@ -3869,7 +3869,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 		/* This is OK -- must be QoS data frame */
 		.security_idx = tid,
 		.seqno_idx = tid,
-		.napi = NULL, /* must be NULL to not have races */
 	};
 	struct tid_ampdu_rx *tid_agg_rx;
 
@@ -4479,8 +4478,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	/* deliver to local stack */
 	skb->protocol = eth_type_trans(skb, fast_rx->dev);
 	memset(skb->cb, 0, sizeof(skb->cb));
-	if (rx->napi)
-		napi_gro_receive(rx->napi, skb);
+	if (rx->list)
+		__skb_queue_tail(rx->list, skb);
 	else
 		netif_receive_skb(skb);
 
@@ -4547,7 +4546,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
 static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 					 struct ieee80211_sta *pubsta,
 					 struct sk_buff *skb,
-					 struct napi_struct *napi)
+					 struct sk_buff_head *list)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
@@ -4562,7 +4561,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	memset(&rx, 0, sizeof(rx));
 	rx.skb = skb;
 	rx.local = local;
-	rx.napi = napi;
+	rx.list = list;
 
 	if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
 		I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
@@ -4670,8 +4669,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
  * This is the receive path handler. It is called by a low level driver when an
  * 802.11 MPDU is received from the hardware.
  */
-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
-		       struct sk_buff *skb, struct napi_struct *napi)
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+		       struct sk_buff *skb, struct sk_buff_head *list)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate = NULL;
@@ -4762,13 +4761,6 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 
 	status->rx_flags = 0;
 
-	/*
-	 * key references and virtual interfaces are protected using RCU
-	 * and this requires that we are in a read-side RCU section during
-	 * receive processing
-	 */
-	rcu_read_lock();
-
 	/*
 	 * Frames with failed FCS/PLCP checksum are not returned,
 	 * all other frames are returned without radiotap header
@@ -4776,23 +4768,43 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 	 * Also, frames with less than 16 bytes are dropped.
 	 */
 	skb = ieee80211_rx_monitor(local, skb, rate);
-	if (!skb) {
-		rcu_read_unlock();
+	if (!skb)
 		return;
-	}
 
 	ieee80211_tpt_led_trig_rx(local,
 			((struct ieee80211_hdr *)skb->data)->frame_control,
 			skb->len);
 
-	__ieee80211_rx_handle_packet(hw, pubsta, skb, napi);
-
-	rcu_read_unlock();
+	__ieee80211_rx_handle_packet(hw, pubsta, skb, list);
 
 	return;
  drop:
 	kfree_skb(skb);
 }
+EXPORT_SYMBOL(ieee80211_rx_list);
+
+void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+		       struct sk_buff *skb, struct napi_struct *napi)
+{
+	struct sk_buff_head list;
+
+	__skb_queue_head_init(&list);
+
+	/*
+	 * key references and virtual interfaces are protected using RCU
+	 * and this requires that we are in a read-side RCU section during
+	 * receive processing
+	 */
+	rcu_read_lock();
+	ieee80211_rx_list(hw, pubsta, skb, &list);
+	rcu_read_unlock();
+
+	while ((skb = __skb_dequeue(&list)) != NULL)
+		if (napi)
+			napi_gro_receive(napi, skb);
+		else
+			netif_receive_skb(skb);
+}
 EXPORT_SYMBOL(ieee80211_rx_napi);
 
 /* This is a version of the rx handler that can be called from hard irq
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC] mac80211: add a function for running rx without passing skbs to the stack
  2020-07-25 18:55 [RFC] mac80211: add a function for running rx without passing skbs to the stack Felix Fietkau
@ 2020-07-25 19:14 ` Johannes Berg
  2020-07-29  1:47   ` Rajkumar Manoharan
  0 siblings, 1 reply; 5+ messages in thread
From: Johannes Berg @ 2020-07-25 19:14 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless


> +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
> +		       struct sk_buff *skb, struct napi_struct *napi)
> +{
> +	struct sk_buff_head list;
> +
> +	__skb_queue_head_init(&list);
> +
> +	/*
> +	 * key references and virtual interfaces are protected using RCU
> +	 * and this requires that we are in a read-side RCU section during
> +	 * receive processing
> +	 */
> +	rcu_read_lock();
> +	ieee80211_rx_list(hw, pubsta, skb, &list);
> +	rcu_read_unlock();
> +
> +	while ((skb = __skb_dequeue(&list)) != NULL)

I'd drop the != NULL, but no strong feelings :)

> +		if (napi)
> +			napi_gro_receive(napi, skb);
> +		else
> +			netif_receive_skb(skb);

Nit: I'd prefer braces on the loop, just makes it nicer to read IMHO.

OTOH, the !napi case should use netif_receive_skb_list(), no?

Given the discussion, it also seems a bit odd to add more work for NAPI
poll where we process one by one ... But I see why you did that, and I
guess it's not actually that much more work.

johannes


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] mac80211: add a function for running rx without passing skbs to the stack
  2020-07-25 19:14 ` Johannes Berg
@ 2020-07-29  1:47   ` Rajkumar Manoharan
  2020-07-29  1:56     ` Rajkumar Manoharan
  2020-07-30 14:02     ` Sebastian Gottschall
  0 siblings, 2 replies; 5+ messages in thread
From: Rajkumar Manoharan @ 2020-07-29  1:47 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Felix Fietkau, linux-wireless, linux-wireless-owner

On 2020-07-25 12:14, Johannes Berg wrote:
>> +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta 
>> *pubsta,
>> +		       struct sk_buff *skb, struct napi_struct *napi)
>> +{
>> +	struct sk_buff_head list;
>> +
>> +	__skb_queue_head_init(&list);
>> +
>> +	/*
>> +	 * key references and virtual interfaces are protected using RCU
>> +	 * and this requires that we are in a read-side RCU section during
>> +	 * receive processing
>> +	 */
>> +	rcu_read_lock();
>> +	ieee80211_rx_list(hw, pubsta, skb, &list);
>> +	rcu_read_unlock();
>> +
>> +	while ((skb = __skb_dequeue(&list)) != NULL)
> 
> I'd drop the != NULL, but no strong feelings :)
> 
>> +		if (napi)
>> +			napi_gro_receive(napi, skb);
>> +		else
>> +			netif_receive_skb(skb);
> 
> Nit: I'd prefer braces on the loop, just makes it nicer to read IMHO.
> 
> OTOH, the !napi case should use netif_receive_skb_list(), no?
> 
> Given the discussion, it also seems a bit odd to add more work for NAPI
> poll where we process one by one ... But I see why you did that, and I
> guess it's not actually that much more work.
> 
IIUC Sebastian mentioned that threadable NAPI approach is helping in 
load balancing
with minor check for dummy netdev for wireless drivers. Does this change 
improve
the latency in both threaded and non-threaded modes?

-Rajkumar

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] mac80211: add a function for running rx without passing skbs to the stack
  2020-07-29  1:47   ` Rajkumar Manoharan
@ 2020-07-29  1:56     ` Rajkumar Manoharan
  2020-07-30 14:02     ` Sebastian Gottschall
  1 sibling, 0 replies; 5+ messages in thread
From: Rajkumar Manoharan @ 2020-07-29  1:56 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Felix Fietkau, linux-wireless, linux-wireless-owner

On 2020-07-28 18:47, Rajkumar Manoharan wrote:
> On 2020-07-25 12:14, Johannes Berg wrote:
>>> +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta 
>>> *pubsta,
>>> +		       struct sk_buff *skb, struct napi_struct *napi)
>>> +{
>>> +	struct sk_buff_head list;
>>> +
>>> +	__skb_queue_head_init(&list);
>>> +
>>> +	/*
>>> +	 * key references and virtual interfaces are protected using RCU
>>> +	 * and this requires that we are in a read-side RCU section during
>>> +	 * receive processing
>>> +	 */
>>> +	rcu_read_lock();
>>> +	ieee80211_rx_list(hw, pubsta, skb, &list);
>>> +	rcu_read_unlock();
>>> +
>>> +	while ((skb = __skb_dequeue(&list)) != NULL)
>> 
>> I'd drop the != NULL, but no strong feelings :)
>> 
>>> +		if (napi)
>>> +			napi_gro_receive(napi, skb);
>>> +		else
>>> +			netif_receive_skb(skb);
>> 
>> Nit: I'd prefer braces on the loop, just makes it nicer to read IMHO.
>> 
>> OTOH, the !napi case should use netif_receive_skb_list(), no?
>> 
>> Given the discussion, it also seems a bit odd to add more work for 
>> NAPI
>> poll where we process one by one ... But I see why you did that, and I
>> guess it's not actually that much more work.
>> 
> IIUC Sebastian mentioned that threadable NAPI approach is helping in
> load balancing
> with minor check for dummy netdev for wireless drivers. Does this 
> change improve
> the latency in both threaded and non-threaded modes?
> 
Also I was thinking of introducing threaded irq in athX driver. I was 
concerned about
more threaded levels and intermediate lists might increase sojourn which 
will impact latency. no?

-Rajkumar

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] mac80211: add a function for running rx without passing skbs to the stack
  2020-07-29  1:47   ` Rajkumar Manoharan
  2020-07-29  1:56     ` Rajkumar Manoharan
@ 2020-07-30 14:02     ` Sebastian Gottschall
  1 sibling, 0 replies; 5+ messages in thread
From: Sebastian Gottschall @ 2020-07-30 14:02 UTC (permalink / raw)
  To: Rajkumar Manoharan, Johannes Berg
  Cc: Felix Fietkau, linux-wireless, linux-wireless-owner


Am 29.07.2020 um 03:47 schrieb Rajkumar Manoharan:
> On 2020-07-25 12:14, Johannes Berg wrote:
>>> +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct 
>>> ieee80211_sta *pubsta,
>>> +               struct sk_buff *skb, struct napi_struct *napi)
>>> +{
>>> +    struct sk_buff_head list;
>>> +
>>> +    __skb_queue_head_init(&list);
>>> +
>>> +    /*
>>> +     * key references and virtual interfaces are protected using RCU
>>> +     * and this requires that we are in a read-side RCU section during
>>> +     * receive processing
>>> +     */
>>> +    rcu_read_lock();
>>> +    ieee80211_rx_list(hw, pubsta, skb, &list);
>>> +    rcu_read_unlock();
>>> +
>>> +    while ((skb = __skb_dequeue(&list)) != NULL)
>>
>> I'd drop the != NULL, but no strong feelings :)
>>
>>> +        if (napi)
>>> +            napi_gro_receive(napi, skb);
>>> +        else
>>> +            netif_receive_skb(skb);
>>
>> Nit: I'd prefer braces on the loop, just makes it nicer to read IMHO.
>>
>> OTOH, the !napi case should use netif_receive_skb_list(), no?
>>
>> Given the discussion, it also seems a bit odd to add more work for NAPI
>> poll where we process one by one ... But I see why you did that, and I
>> guess it's not actually that much more work.
>>
> IIUC Sebastian mentioned that threadable NAPI approach is helping in 
> load balancing
> with minor check for dummy netdev for wireless drivers. Does this 
> change improve
> the latency in both threaded and non-threaded modes?
check also for felix's last variant. he made a different patch based on 
workq's
i'm testing this variant now on various platforms including a patched 
ath10k driver
next step would be just to change other wifi drivers for the same 
concept. just a few are using napi at all (mt76, ath10k and iwlwifi)
>
> -Rajkumar
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-07-30 14:02 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-25 18:55 [RFC] mac80211: add a function for running rx without passing skbs to the stack Felix Fietkau
2020-07-25 19:14 ` Johannes Berg
2020-07-29  1:47   ` Rajkumar Manoharan
2020-07-29  1:56     ` Rajkumar Manoharan
2020-07-30 14:02     ` Sebastian Gottschall

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).