linux-wireless.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mac80211: rework locking for txq scheduling / airtime fairness
@ 2019-03-15 10:03 Felix Fietkau
  2019-03-15 13:04 ` Toke Høiland-Jørgensen
  2019-10-10  2:43 ` Yibo Zhao
  0 siblings, 2 replies; 4+ messages in thread
From: Felix Fietkau @ 2019-03-15 10:03 UTC (permalink / raw)
  To: linux-wireless; +Cc: johannes, Toke Høiland-Jørgensen

Holding the lock around the entire duration of tx scheduling can create
some nasty lock contention, especially when processing airtime information
from the tx status or the rx path.
Improve locking by only holding the active_txq_lock for lookups / scheduling
list modifications.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/mac80211.h | 49 ++++++++++++++++--------------------------
 net/mac80211/tx.c      | 44 ++++++++++++++-----------------------
 2 files changed, 35 insertions(+), 58 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3771625b7a9d..0de0aba580eb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6269,8 +6269,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to return packets from.
  *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
  * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
  * is returned, it should be returned with ieee80211_return_txq() after the
  * driver has finished scheduling it.
@@ -6278,51 +6276,42 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
 
 /**
- * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
- *
- * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
- *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
- */
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
-
-/**
- * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ * ieee80211_txq_schedule_start - start new scheduling round for TXQs
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to acquire locks for
  *
- * Acquire locks needed to schedule TXQs from the given AC. Should be called
- * before ieee80211_next_txq() or ieee80211_return_txq().
+ * Should be called before ieee80211_next_txq() or ieee80211_return_txq().
+ * The driver must not call multiple TXQ scheduling rounds concurrently.
  */
-void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
-	__acquires(txq_lock);
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
+
+/* (deprecated) */
+static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+{
+}
 
 /**
- * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @ac: AC number to acquire locks for
+ * @txq: pointer obtained from station or virtual interface
  *
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Schedules a TXQ for transmission if it is not already scheduled.
  */
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
-	__releases(txq_lock);
+void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
 
 /**
- * ieee80211_schedule_txq - schedule a TXQ for transmission
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
- *
- * Schedules a TXQ for transmission if it is not already scheduled. Takes a
- * lock, which means it must *not* be called between
- * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
  */
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
-	__acquires(txq_lock) __releases(txq_lock);
+static inline void
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+	ieee80211_schedule_txq(hw, txq);
+}
 
 /**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ca23abbf5c0b..51cc37802439 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3653,16 +3653,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_txq *ret = NULL;
 	struct txq_info *txqi = NULL;
 
-	lockdep_assert_held(&local->active_txq_lock[ac]);
+	spin_lock_bh(&local->active_txq_lock[ac]);
 
  begin:
 	txqi = list_first_entry_or_null(&local->active_txqs[ac],
 					struct txq_info,
 					schedule_order);
 	if (!txqi)
-		return NULL;
+		goto out;
 
 	if (txqi->txq.sta) {
 		struct sta_info *sta = container_of(txqi->txq.sta,
@@ -3679,21 +3680,25 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 
 
 	if (txqi->schedule_round == local->schedule_round[ac])
-		return NULL;
+		goto out;
 
 	list_del_init(&txqi->schedule_order);
 	txqi->schedule_round = local->schedule_round[ac];
-	return &txqi->txq;
+	ret = &txqi->txq;
+
+out:
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+	return ret;
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_return_txq(struct ieee80211_hw *hw,
-			  struct ieee80211_txq *txq)
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
 
-	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+	spin_lock_bh(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
 	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
@@ -3713,17 +3718,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 			list_add_tail(&txqi->schedule_order,
 				      &local->active_txqs[txq->ac]);
 	}
-}
-EXPORT_SYMBOL(ieee80211_return_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
-	__acquires(txq_lock) __releases(txq_lock)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-
-	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	ieee80211_return_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
 EXPORT_SYMBOL(ieee80211_schedule_txq);
@@ -3736,7 +3731,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 	struct sta_info *sta;
 	u8 ac = txq->ac;
 
-	lockdep_assert_held(&local->active_txq_lock[ac]);
+	spin_lock_bh(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
 		goto out;
@@ -3766,34 +3761,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 
 	sta->airtime[ac].deficit += sta->airtime_weight;
 	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
 
 	return false;
 out:
 	if (!list_empty(&txqi->schedule_order))
 		list_del_init(&txqi->schedule_order);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
 
 	return true;
 }
 EXPORT_SYMBOL(ieee80211_txq_may_transmit);
 
 void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
-	__acquires(txq_lock)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 	local->schedule_round[ac]++;
-}
-EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
-	__releases(txq_lock)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
-EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
-- 
2.17.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] mac80211: rework locking for txq scheduling / airtime fairness
  2019-03-15 10:03 [PATCH] mac80211: rework locking for txq scheduling / airtime fairness Felix Fietkau
@ 2019-03-15 13:04 ` Toke Høiland-Jørgensen
  2019-10-10  2:43 ` Yibo Zhao
  1 sibling, 0 replies; 4+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-03-15 13:04 UTC (permalink / raw)
  To: Felix Fietkau, linux-wireless; +Cc: johannes

Felix Fietkau <nbd@nbd.name> writes:

> Holding the lock around the entire duration of tx scheduling can create
> some nasty lock contention, especially when processing airtime information
> from the tx status or the rx path.
> Improve locking by only holding the active_txq_lock for lookups / scheduling
> list modifications.
>
> Signed-off-by: Felix Fietkau <nbd@nbd.name>

Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>

>  include/net/mac80211.h | 49 ++++++++++++++++--------------------------
>  net/mac80211/tx.c      | 44 ++++++++++++++-----------------------
>  2 files changed, 35 insertions(+), 58 deletions(-)
>
> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
> index 3771625b7a9d..0de0aba580eb 100644
> --- a/include/net/mac80211.h
> +++ b/include/net/mac80211.h

[ ... ]
> -void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)

> -	__acquires(txq_lock);
> +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
> +
> +/* (deprecated) */
> +static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
> +{
> +}

I figure I'll post a cleanup of this as part of my reworked schedule
change patch; since I'll be messing around with these bits anyway...

-Toke

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mac80211: rework locking for txq scheduling / airtime fairness
  2019-03-15 10:03 [PATCH] mac80211: rework locking for txq scheduling / airtime fairness Felix Fietkau
  2019-03-15 13:04 ` Toke Høiland-Jørgensen
@ 2019-10-10  2:43 ` Yibo Zhao
  2019-10-10  5:56   ` Felix Fietkau
  1 sibling, 1 reply; 4+ messages in thread
From: Yibo Zhao @ 2019-10-10  2:43 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: linux-wireless, johannes, Toke Høiland-Jørgensen,
	linux-wireless-owner

> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index ca23abbf5c0b..51cc37802439 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -3653,16 +3653,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
>  struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 
> ac)
>  {
>  	struct ieee80211_local *local = hw_to_local(hw);
> +	struct ieee80211_txq *ret = NULL;
>  	struct txq_info *txqi = NULL;
> 
> -	lockdep_assert_held(&local->active_txq_lock[ac]);
> +	spin_lock_bh(&local->active_txq_lock[ac]);
> 
>   begin:
>  	txqi = list_first_entry_or_null(&local->active_txqs[ac],
>  					struct txq_info,
>  					schedule_order);
>  	if (!txqi)
> -		return NULL;
> +		goto out;
> 
>  	if (txqi->txq.sta) {
>  		struct sta_info *sta = container_of(txqi->txq.sta,
> @@ -3679,21 +3680,25 @@ struct ieee80211_txq
> *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
> 
> 
>  	if (txqi->schedule_round == local->schedule_round[ac])
> -		return NULL;
> +		goto out;
> 
>  	list_del_init(&txqi->schedule_order);
>  	txqi->schedule_round = local->schedule_round[ac];
> -	return &txqi->txq;
> +	ret = &txqi->txq;
> +
> +out:
> +	spin_unlock_bh(&local->active_txq_lock[ac]);
> +	return ret;
>  }
>  EXPORT_SYMBOL(ieee80211_next_txq);
> 
> -void ieee80211_return_txq(struct ieee80211_hw *hw,
> -			  struct ieee80211_txq *txq)
> +void ieee80211_schedule_txq(struct ieee80211_hw *hw,
> +			    struct ieee80211_txq *txq)
>  {
>  	struct ieee80211_local *local = hw_to_local(hw);
>  	struct txq_info *txqi = to_txq_info(txq);
> 
> -	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
> +	spin_lock_bh(&local->active_txq_lock[txq->ac]);
> 
>  	if (list_empty(&txqi->schedule_order) &&
>  	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
> @@ -3713,17 +3718,7 @@ void ieee80211_return_txq(struct ieee80211_hw 
> *hw,
>  			list_add_tail(&txqi->schedule_order,
>  				      &local->active_txqs[txq->ac]);
>  	}
> -}
> -EXPORT_SYMBOL(ieee80211_return_txq);
> 
> -void ieee80211_schedule_txq(struct ieee80211_hw *hw,
> -			    struct ieee80211_txq *txq)
> -	__acquires(txq_lock) __releases(txq_lock)
> -{
> -	struct ieee80211_local *local = hw_to_local(hw);
> -
> -	spin_lock_bh(&local->active_txq_lock[txq->ac]);
> -	ieee80211_return_txq(hw, txq);
>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>  }
>  EXPORT_SYMBOL(ieee80211_schedule_txq);
> @@ -3736,7 +3731,7 @@ bool ieee80211_txq_may_transmit(struct 
> ieee80211_hw *hw,
>  	struct sta_info *sta;
>  	u8 ac = txq->ac;
> 
> -	lockdep_assert_held(&local->active_txq_lock[ac]);
> +	spin_lock_bh(&local->active_txq_lock[ac]);
> 
>  	if (!txqi->txq.sta)
>  		goto out;
> @@ -3766,34 +3761,27 @@ bool ieee80211_txq_may_transmit(struct 
> ieee80211_hw *hw,
> 
>  	sta->airtime[ac].deficit += sta->airtime_weight;
>  	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
> +	spin_unlock_bh(&local->active_txq_lock[ac]);
> 
>  	return false;
>  out:
>  	if (!list_empty(&txqi->schedule_order))
>  		list_del_init(&txqi->schedule_order);
> +	spin_unlock_bh(&local->active_txq_lock[ac]);
> 
>  	return true;
>  }
>  EXPORT_SYMBOL(ieee80211_txq_may_transmit);
> 
>  void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
> -	__acquires(txq_lock)
>  {
>  	struct ieee80211_local *local = hw_to_local(hw);
> 
>  	spin_lock_bh(&local->active_txq_lock[ac]);
>  	local->schedule_round[ac]++;
Hi Felix,

In ath10k, we might have situations like,

           CPU0                           CPU1
...                                   ieee80211_txq_schedule_start()
...                                   while(ieee80211_next_txq()) {
...                                       driver dequeue packets from 
txq
...                                       ieee80211_return_txq()
ieee80211_txq_schedule_start()            ...
ieee80211_next_txq()                  }
driver dequeue packets from txq       ieee80211_txq_schedule_end()
ieee80211_return_txq()
ieee80211_txq_schedule_end()

The problem is while CPU1 is looping to dequeue txqs, the schedule_round 
which is used to make sure no infinite loop will be changed in CPU0. So 
a txq has already been scheduled will be scheduled again then the loop 
won't be end at that point as our expected.

> -}
> -EXPORT_SYMBOL(ieee80211_txq_schedule_start);
> -
> -void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
> -	__releases(txq_lock)
> -{
> -	struct ieee80211_local *local = hw_to_local(hw);
> -
>  	spin_unlock_bh(&local->active_txq_lock[ac]);
>  }
> -EXPORT_SYMBOL(ieee80211_txq_schedule_end);
> +EXPORT_SYMBOL(ieee80211_txq_schedule_start);
> 
>  void __ieee80211_subif_start_xmit(struct sk_buff *skb,
>  				  struct net_device *dev,

-- 
Yibo

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mac80211: rework locking for txq scheduling / airtime fairness
  2019-10-10  2:43 ` Yibo Zhao
@ 2019-10-10  5:56   ` Felix Fietkau
  0 siblings, 0 replies; 4+ messages in thread
From: Felix Fietkau @ 2019-10-10  5:56 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless, johannes, Toke Høiland-Jørgensen

On 2019-10-10 04:43, Yibo Zhao wrote:
> Hi Felix,
> 
> In ath10k, we might have situations like,
> 
>            CPU0                           CPU1
> ...                                   ieee80211_txq_schedule_start()
> ...                                   while(ieee80211_next_txq()) {
> ...                                       driver dequeue packets from 
> txq
> ...                                       ieee80211_return_txq()
> ieee80211_txq_schedule_start()            ...
> ieee80211_next_txq()                  }
> driver dequeue packets from txq       ieee80211_txq_schedule_end()
> ieee80211_return_txq()
> ieee80211_txq_schedule_end()
> 
> The problem is while CPU1 is looping to dequeue txqs, the schedule_round 
> which is used to make sure no infinite loop will be changed in CPU0. So 
> a txq has already been scheduled will be scheduled again then the loop 
> won't be end at that point as our expected.
Hi,

I think this is something that should be fixed in the driver. The
comment for ieee80211_txq_schedule_start() already states:
"The driver must not call multiple TXQ scheduling rounds concurrently"

The way that I'm dealing with this in mt76 is that I ensure that all txq
scheduling is done from a single tasklet, which is scheduled from the tx
status and the queue wake path.

I think that approach should perform well in ath10k as well, and it will
avoid the cost of waiting for a lock on one CPU while the other is
scheduling tx already.

- Felix

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-10-10  5:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-15 10:03 [PATCH] mac80211: rework locking for txq scheduling / airtime fairness Felix Fietkau
2019-03-15 13:04 ` Toke Høiland-Jørgensen
2019-10-10  2:43 ` Yibo Zhao
2019-10-10  5:56   ` Felix Fietkau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).