All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
@ 2019-09-16 13:09 ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Toke Høiland-Jørgensen

From: Toke Høiland-Jørgensen <toke@redhat.com>

This switches the airtime scheduler in mac80211 to use a virtual time-based
scheduler instead of the round-robin scheduler used before. This has a
couple of advantages:

- No need to sync up the round-robin scheduler in firmware/hardware with
  the round-robin airtime scheduler.

- If several stations are eligible for transmission we can schedule both of
  them; no need to hard-block the scheduling rotation until the head of the
  queue has used up its quantum.

- The check of whether a station is eligible for transmission becomes
  simpler (in ieee80211_txq_may_transmit()).

The drawback is that scheduling becomes slightly more expensive, as we need
to maintain an rbtree of TXQs sorted by virtual time. This means that
ieee80211_register_airtime() becomes O(logN) in the number of currently
scheduled TXQs. However, hopefully this number rarely grows too big (it's
only TXQs currently backlogged, not all associated stations), so it
shouldn't be too big of an issue.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 net/mac80211/debugfs.c     |  48 +++++++++-
 net/mac80211/debugfs_sta.c |  16 ++--
 net/mac80211/ieee80211_i.h |  14 ++-
 net/mac80211/main.c        |   2 +-
 net/mac80211/sta_info.c    |  19 +++-
 net/mac80211/sta_info.h    |   3 +-
 net/mac80211/tx.c          | 217 +++++++++++++++++++++++++++++----------------
 7 files changed, 223 insertions(+), 96 deletions(-)

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2d43bc1..4847168 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -150,6 +150,46 @@ static ssize_t aqm_write(struct file *file,
 	.llseek = default_llseek,
 };
 
+static ssize_t airtime_read(struct file *file,
+			    char __user *user_buf,
+			    size_t count,
+			    loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	char buf[200];
+	u64 v_t[IEEE80211_NUM_ACS];
+	u64 wt[IEEE80211_NUM_ACS];
+	int len = 0, ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		v_t[ac] = local->airtime_v_t[ac];
+		wt[ac] = local->airtime_weight_sum[ac];
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+	len = scnprintf(buf, sizeof(buf),
+			"\tVO         VI         BE         BK\n"
+			"Virt-t\t%-10llu %-10llu %-10llu %-10llu\n"
+			"Weight\t%-10llu %-10llu %-10llu %-10llu\n",
+			v_t[0],
+			v_t[1],
+			v_t[2],
+			v_t[3],
+			wt[0],
+			wt[1],
+			wt[2],
+			wt[3]);
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       buf, len);
+}
+
+static const struct file_operations airtime_ops = {
+	.read = airtime_read,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 #ifdef CONFIG_PM
 static ssize_t reset_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
@@ -386,8 +426,12 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD_MODE(aqm, 0600);
 
-	debugfs_create_u16("airtime_flags", 0600,
-			   phyd, &local->airtime_flags);
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+		DEBUGFS_ADD_MODE(airtime, 0600);
+		debugfs_create_u16("airtime_flags", 0600,
+				   phyd, &local->airtime_flags);
+	}
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 3aa618d..80028da 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -203,7 +203,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 	size_t bufsz = 200;
 	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
 	u64 rx_airtime = 0, tx_airtime = 0;
-	s64 deficit[IEEE80211_NUM_ACS];
+	u64 v_t[IEEE80211_NUM_ACS];
 	ssize_t rv;
 	int ac;
 
@@ -214,20 +214,20 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 		spin_lock_bh(&local->active_txq_lock[ac]);
 		rx_airtime += sta->airtime[ac].rx_airtime;
 		tx_airtime += sta->airtime[ac].tx_airtime;
-		deficit[ac] = sta->airtime[ac].deficit;
+		v_t[ac] = sta->airtime[ac].v_t;
 		spin_unlock_bh(&local->active_txq_lock[ac]);
 	}
 
 	p += scnprintf(p, bufsz + buf - p,
 		"RX: %llu us\nTX: %llu us\nWeight: %u\n"
-		"Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+		"Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
 		rx_airtime,
 		tx_airtime,
 		sta->airtime_weight,
-		deficit[0],
-		deficit[1],
-		deficit[2],
-		deficit[3]);
+		v_t[0],
+		v_t[1],
+		v_t[2],
+		v_t[3]);
 
 	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 	kfree(buf);
@@ -245,7 +245,7 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
 		spin_lock_bh(&local->active_txq_lock[ac]);
 		sta->airtime[ac].rx_airtime = 0;
 		sta->airtime[ac].tx_airtime = 0;
-		sta->airtime[ac].deficit = sta->airtime_weight;
+		sta->airtime[ac].v_t = 0;
 		spin_unlock_bh(&local->active_txq_lock[ac]);
 	}
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e170f98..a4556f9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -846,8 +846,7 @@ struct txq_info {
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
-	struct list_head schedule_order;
-	u16 schedule_round;
+	struct rb_node schedule_order;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1141,8 +1140,10 @@ struct ieee80211_local {
 
 	/* protects active_txqs and txqi->schedule_order */
 	spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
-	struct list_head active_txqs[IEEE80211_NUM_ACS];
-	u16 schedule_round[IEEE80211_NUM_ACS];
+	struct rb_root_cached active_txqs[IEEE80211_NUM_ACS];
+	struct rb_node *schedule_pos[IEEE80211_NUM_ACS];
+	u64 airtime_v_t[IEEE80211_NUM_ACS];
+	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
 
 	u16 airtime_flags;
 
@@ -1779,6 +1780,11 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len,
 			      const u8 *dest, __be16 proto, bool unencrypted);
 
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq);
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq);
+
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5055aeb..e9ffa8e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -666,7 +666,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	spin_lock_init(&local->queue_stop_reason_lock);
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
-		INIT_LIST_HEAD(&local->active_txqs[i]);
+		local->active_txqs[i] = RB_ROOT_CACHED;
 		spin_lock_init(&local->active_txq_lock[i]);
 	}
 	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 11f0589..9d01fdd 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -389,7 +389,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
-		sta->airtime[i].deficit = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1831,18 +1830,32 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_local *local = sta->sdata->local;
+	struct ieee80211_txq *txq = sta->sta.txq[tid];
 	u8 ac = ieee80211_ac_from_tid(tid);
-	u32 airtime = 0;
+	u64 airtime = 0, weight_sum;
+
+	if (!txq)
+		return;
 
 	if (sta->local->airtime_flags & AIRTIME_USE_TX)
 		airtime += tx_airtime;
 	if (sta->local->airtime_flags & AIRTIME_USE_RX)
 		airtime += rx_airtime;
 
+	/* Weights scale so the unit weight is 256 */
+	airtime <<= 8;
+
 	spin_lock_bh(&local->active_txq_lock[ac]);
+
 	sta->airtime[ac].tx_airtime += tx_airtime;
 	sta->airtime[ac].rx_airtime += rx_airtime;
-	sta->airtime[ac].deficit -= airtime;
+
+	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
+
+	local->airtime_v_t[ac] += airtime / weight_sum;
+	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
+	ieee80211_resort_txq(&local->hw, txq);
+
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
 EXPORT_SYMBOL(ieee80211_sta_register_airtime);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 71f7e49..5c1cac9 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -130,11 +130,12 @@ enum ieee80211_agg_stop_reason {
 /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
 #define AIRTIME_USE_TX		BIT(0)
 #define AIRTIME_USE_RX		BIT(1)
+#define AIRTIME_GRACE 500 /* usec of grace period before reset */
 
 struct airtime_info {
 	u64 rx_airtime;
 	u64 tx_airtime;
-	s64 deficit;
+	u64 v_t;
 };
 
 struct sta_info;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 61c7ea9..d00baaa 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1449,7 +1449,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
-	INIT_LIST_HEAD(&txqi->schedule_order);
+	RB_CLEAR_NODE(&txqi->schedule_order);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -1493,9 +1493,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
 	spin_unlock_bh(&fq->lock);
 
-	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
-	list_del_init(&txqi->schedule_order);
-	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	ieee80211_unschedule_txq(&local->hw, &txqi->txq);
 }
 
 void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -3640,126 +3638,191 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct rb_node *node = local->schedule_pos[ac];
 	struct txq_info *txqi = NULL;
+	bool first = false;
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
- begin:
-	txqi = list_first_entry_or_null(&local->active_txqs[ac],
-					struct txq_info,
-					schedule_order);
-	if (!txqi)
+	if (!node) {
+		node = rb_first_cached(&local->active_txqs[ac]);
+		first = true;
+	} else {
+		node = rb_next(node);
+	}
+
+	if (!node)
 		return NULL;
 
+	txqi = container_of(node, struct txq_info, schedule_order);
+
 	if (txqi->txq.sta) {
 		struct sta_info *sta = container_of(txqi->txq.sta,
 						struct sta_info, sta);
 
-		if (sta->airtime[txqi->txq.ac].deficit < 0) {
-			sta->airtime[txqi->txq.ac].deficit +=
-				sta->airtime_weight;
-			list_move_tail(&txqi->schedule_order,
-				       &local->active_txqs[txqi->txq.ac]);
-			goto begin;
+		if (sta->airtime[ac].v_t > local->airtime_v_t[ac]) {
+			if (first)
+				local->airtime_v_t[ac] = sta->airtime[ac].v_t;
+			else
+				return NULL;
 		}
 	}
 
 
-	if (txqi->schedule_round == local->schedule_round[ac])
-		return NULL;
-
-	list_del_init(&txqi->schedule_order);
-	txqi->schedule_round = local->schedule_round[ac];
+	local->schedule_pos[ac] = node;
 	return &txqi->txq;
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_return_txq(struct ieee80211_hw *hw,
+static void __ieee80211_insert_txq(struct rb_root_cached *root,
+				   struct txq_info *txqi, u8 ac)
+{
+	struct rb_node **new = &root->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct txq_info *__txqi;
+	bool leftmost = true;
+
+	while (*new) {
+		parent = *new;
+		__txqi = rb_entry(parent, struct txq_info, schedule_order);
+
+		if (!txqi->txq.sta) {
+			/* new txqi has no sta - insert to the left */
+			new = &parent->rb_left;
+		} else if (!__txqi->txq.sta) {
+			/* existing txqi has no sta - insert to the right */
+			new = &parent->rb_right;
+			leftmost = false;
+		} else {
+			struct sta_info *old_sta = container_of(__txqi->txq.sta,
+								struct sta_info,
+								sta);
+			struct sta_info *new_sta = container_of(txqi->txq.sta,
+								struct sta_info,
+								sta);
+
+			if (new_sta->airtime[ac].v_t <= old_sta->airtime[ac].v_t) {
+				new = &parent->rb_left;
+			} else {
+				new = &parent->rb_right;
+				leftmost = false;
+			}
+		}
+	}
+
+	rb_link_node(&txqi->schedule_order, parent, new);
+	rb_insert_color_cached(&txqi->schedule_order, root, leftmost);
+}
+
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
+	__acquires(txq_lock) __releases(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	u8 ac = txq->ac;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order))
+		goto out;
+
+	if (txq->sta) {
+		struct sta_info *sta = container_of(txq->sta,
+						    struct sta_info, sta);
+
+		local->airtime_weight_sum[ac] += sta->airtime_weight;
+		if (local->airtime_v_t[ac] > AIRTIME_GRACE)
+			sta->airtime[ac].v_t = max(local->airtime_v_t[ac] - AIRTIME_GRACE,
+						   sta->airtime[ac].v_t);
+	}
+
+	__ieee80211_insert_txq(&local->active_txqs[ac], txqi, ac);
+
+ out:
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_schedule_txq);
+
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
 			  struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
+	u8 ac = txq->ac;
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+		rb_erase_cached(&txqi->schedule_order,
+				&local->active_txqs[ac]);
+		RB_CLEAR_NODE(&txqi->schedule_order);
+		__ieee80211_insert_txq(&local->active_txqs[ac], txqi, ac);
+	}
+}
+
+static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+				       struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	u8 ac = txq->ac;
 
 	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
 
-	if (list_empty(&txqi->schedule_order) &&
-	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
-		/* If airtime accounting is active, always enqueue STAs at the
-		 * head of the list to ensure that they only get moved to the
-		 * back by the airtime DRR scheduler once they have a negative
-		 * deficit. A station that already has a negative deficit will
-		 * get immediately moved to the back of the list on the next
-		 * call to ieee80211_next_txq().
-		 */
-		if (txqi->txq.sta &&
-		    wiphy_ext_feature_isset(local->hw.wiphy,
-					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
-			list_add(&txqi->schedule_order,
-				 &local->active_txqs[txq->ac]);
-		else
-			list_add_tail(&txqi->schedule_order,
-				      &local->active_txqs[txq->ac]);
+	if (RB_EMPTY_NODE(&txqi->schedule_order))
+		return;
+
+	if (txq->sta) {
+		struct sta_info *sta = container_of(txq->sta,
+						    struct sta_info, sta);
+
+		local->airtime_weight_sum[ac] -= sta->airtime_weight;
 	}
+
+	rb_erase_cached(&txqi->schedule_order,
+			&local->active_txqs[txq->ac]);
+	RB_CLEAR_NODE(&txqi->schedule_order);
 }
-EXPORT_SYMBOL(ieee80211_return_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq)
 	__acquires(txq_lock) __releases(txq_lock)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	ieee80211_return_txq(hw, txq);
+	__ieee80211_unschedule_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+
+void ieee80211_return_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
+	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
+		__ieee80211_unschedule_txq(hw, txq);
+}
+EXPORT_SYMBOL(ieee80211_return_txq);
 
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
+	struct txq_info *txqi = to_txq_info(txq);
 	struct sta_info *sta;
 	u8 ac = txq->ac;
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
-		goto out;
-
-	if (list_empty(&txqi->schedule_order))
-		goto out;
-
-	list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
-				 schedule_order) {
-		if (iter == txqi)
-			break;
-
-		if (!iter->txq.sta) {
-			list_move_tail(&iter->schedule_order,
-				       &local->active_txqs[ac]);
-			continue;
-		}
-		sta = container_of(iter->txq.sta, struct sta_info, sta);
-		if (sta->airtime[ac].deficit < 0)
-			sta->airtime[ac].deficit += sta->airtime_weight;
-		list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
-	}
+		return true;
 
 	sta = container_of(txqi->txq.sta, struct sta_info, sta);
-	if (sta->airtime[ac].deficit >= 0)
-		goto out;
-
-	sta->airtime[ac].deficit += sta->airtime_weight;
-	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
-
-	return false;
-out:
-	if (!list_empty(&txqi->schedule_order))
-		list_del_init(&txqi->schedule_order);
-
-	return true;
+	return (sta->airtime[ac].v_t <= local->airtime_v_t[ac]);
 }
 EXPORT_SYMBOL(ieee80211_txq_may_transmit);
 
@@ -3769,7 +3832,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
-	local->schedule_round[ac]++;
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
@@ -3778,6 +3840,7 @@ void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
+	local->schedule_pos[ac] = NULL;
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_end);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
@ 2019-09-16 13:09 ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: Toke Høiland-Jørgensen, linux-wireless

From: Toke Høiland-Jørgensen <toke@redhat.com>

This switches the airtime scheduler in mac80211 to use a virtual time-based
scheduler instead of the round-robin scheduler used before. This has a
couple of advantages:

- No need to sync up the round-robin scheduler in firmware/hardware with
  the round-robin airtime scheduler.

- If several stations are eligible for transmission we can schedule both of
  them; no need to hard-block the scheduling rotation until the head of the
  queue has used up its quantum.

- The check of whether a station is eligible for transmission becomes
  simpler (in ieee80211_txq_may_transmit()).

The drawback is that scheduling becomes slightly more expensive, as we need
to maintain an rbtree of TXQs sorted by virtual time. This means that
ieee80211_register_airtime() becomes O(logN) in the number of currently
scheduled TXQs. However, hopefully this number rarely grows too big (it's
only TXQs currently backlogged, not all associated stations), so it
shouldn't be too big of an issue.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 net/mac80211/debugfs.c     |  48 +++++++++-
 net/mac80211/debugfs_sta.c |  16 ++--
 net/mac80211/ieee80211_i.h |  14 ++-
 net/mac80211/main.c        |   2 +-
 net/mac80211/sta_info.c    |  19 +++-
 net/mac80211/sta_info.h    |   3 +-
 net/mac80211/tx.c          | 217 +++++++++++++++++++++++++++++----------------
 7 files changed, 223 insertions(+), 96 deletions(-)

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2d43bc1..4847168 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -150,6 +150,46 @@ static ssize_t aqm_write(struct file *file,
 	.llseek = default_llseek,
 };
 
+static ssize_t airtime_read(struct file *file,
+			    char __user *user_buf,
+			    size_t count,
+			    loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	char buf[200];
+	u64 v_t[IEEE80211_NUM_ACS];
+	u64 wt[IEEE80211_NUM_ACS];
+	int len = 0, ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		v_t[ac] = local->airtime_v_t[ac];
+		wt[ac] = local->airtime_weight_sum[ac];
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+	len = scnprintf(buf, sizeof(buf),
+			"\tVO         VI         BE         BK\n"
+			"Virt-t\t%-10llu %-10llu %-10llu %-10llu\n"
+			"Weight\t%-10llu %-10llu %-10llu %-10llu\n",
+			v_t[0],
+			v_t[1],
+			v_t[2],
+			v_t[3],
+			wt[0],
+			wt[1],
+			wt[2],
+			wt[3]);
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       buf, len);
+}
+
+static const struct file_operations airtime_ops = {
+	.read = airtime_read,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 #ifdef CONFIG_PM
 static ssize_t reset_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
@@ -386,8 +426,12 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD_MODE(aqm, 0600);
 
-	debugfs_create_u16("airtime_flags", 0600,
-			   phyd, &local->airtime_flags);
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+		DEBUGFS_ADD_MODE(airtime, 0600);
+		debugfs_create_u16("airtime_flags", 0600,
+				   phyd, &local->airtime_flags);
+	}
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 3aa618d..80028da 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -203,7 +203,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 	size_t bufsz = 200;
 	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
 	u64 rx_airtime = 0, tx_airtime = 0;
-	s64 deficit[IEEE80211_NUM_ACS];
+	u64 v_t[IEEE80211_NUM_ACS];
 	ssize_t rv;
 	int ac;
 
@@ -214,20 +214,20 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 		spin_lock_bh(&local->active_txq_lock[ac]);
 		rx_airtime += sta->airtime[ac].rx_airtime;
 		tx_airtime += sta->airtime[ac].tx_airtime;
-		deficit[ac] = sta->airtime[ac].deficit;
+		v_t[ac] = sta->airtime[ac].v_t;
 		spin_unlock_bh(&local->active_txq_lock[ac]);
 	}
 
 	p += scnprintf(p, bufsz + buf - p,
 		"RX: %llu us\nTX: %llu us\nWeight: %u\n"
-		"Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+		"Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
 		rx_airtime,
 		tx_airtime,
 		sta->airtime_weight,
-		deficit[0],
-		deficit[1],
-		deficit[2],
-		deficit[3]);
+		v_t[0],
+		v_t[1],
+		v_t[2],
+		v_t[3]);
 
 	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 	kfree(buf);
@@ -245,7 +245,7 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
 		spin_lock_bh(&local->active_txq_lock[ac]);
 		sta->airtime[ac].rx_airtime = 0;
 		sta->airtime[ac].tx_airtime = 0;
-		sta->airtime[ac].deficit = sta->airtime_weight;
+		sta->airtime[ac].v_t = 0;
 		spin_unlock_bh(&local->active_txq_lock[ac]);
 	}
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e170f98..a4556f9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -846,8 +846,7 @@ struct txq_info {
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
-	struct list_head schedule_order;
-	u16 schedule_round;
+	struct rb_node schedule_order;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1141,8 +1140,10 @@ struct ieee80211_local {
 
 	/* protects active_txqs and txqi->schedule_order */
 	spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
-	struct list_head active_txqs[IEEE80211_NUM_ACS];
-	u16 schedule_round[IEEE80211_NUM_ACS];
+	struct rb_root_cached active_txqs[IEEE80211_NUM_ACS];
+	struct rb_node *schedule_pos[IEEE80211_NUM_ACS];
+	u64 airtime_v_t[IEEE80211_NUM_ACS];
+	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
 
 	u16 airtime_flags;
 
@@ -1779,6 +1780,11 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len,
 			      const u8 *dest, __be16 proto, bool unencrypted);
 
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq);
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq);
+
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5055aeb..e9ffa8e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -666,7 +666,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	spin_lock_init(&local->queue_stop_reason_lock);
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
-		INIT_LIST_HEAD(&local->active_txqs[i]);
+		local->active_txqs[i] = RB_ROOT_CACHED;
 		spin_lock_init(&local->active_txq_lock[i]);
 	}
 	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 11f0589..9d01fdd 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -389,7 +389,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
-		sta->airtime[i].deficit = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1831,18 +1830,32 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_local *local = sta->sdata->local;
+	struct ieee80211_txq *txq = sta->sta.txq[tid];
 	u8 ac = ieee80211_ac_from_tid(tid);
-	u32 airtime = 0;
+	u64 airtime = 0, weight_sum;
+
+	if (!txq)
+		return;
 
 	if (sta->local->airtime_flags & AIRTIME_USE_TX)
 		airtime += tx_airtime;
 	if (sta->local->airtime_flags & AIRTIME_USE_RX)
 		airtime += rx_airtime;
 
+	/* Weights scale so the unit weight is 256 */
+	airtime <<= 8;
+
 	spin_lock_bh(&local->active_txq_lock[ac]);
+
 	sta->airtime[ac].tx_airtime += tx_airtime;
 	sta->airtime[ac].rx_airtime += rx_airtime;
-	sta->airtime[ac].deficit -= airtime;
+
+	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
+
+	local->airtime_v_t[ac] += airtime / weight_sum;
+	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
+	ieee80211_resort_txq(&local->hw, txq);
+
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
 EXPORT_SYMBOL(ieee80211_sta_register_airtime);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 71f7e49..5c1cac9 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -130,11 +130,12 @@ enum ieee80211_agg_stop_reason {
 /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
 #define AIRTIME_USE_TX		BIT(0)
 #define AIRTIME_USE_RX		BIT(1)
+#define AIRTIME_GRACE 500 /* usec of grace period before reset */
 
 struct airtime_info {
 	u64 rx_airtime;
 	u64 tx_airtime;
-	s64 deficit;
+	u64 v_t;
 };
 
 struct sta_info;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 61c7ea9..d00baaa 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1449,7 +1449,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
-	INIT_LIST_HEAD(&txqi->schedule_order);
+	RB_CLEAR_NODE(&txqi->schedule_order);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -1493,9 +1493,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
 	spin_unlock_bh(&fq->lock);
 
-	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
-	list_del_init(&txqi->schedule_order);
-	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	ieee80211_unschedule_txq(&local->hw, &txqi->txq);
 }
 
 void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -3640,126 +3638,191 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct rb_node *node = local->schedule_pos[ac];
 	struct txq_info *txqi = NULL;
+	bool first = false;
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
- begin:
-	txqi = list_first_entry_or_null(&local->active_txqs[ac],
-					struct txq_info,
-					schedule_order);
-	if (!txqi)
+	if (!node) {
+		node = rb_first_cached(&local->active_txqs[ac]);
+		first = true;
+	} else {
+		node = rb_next(node);
+	}
+
+	if (!node)
 		return NULL;
 
+	txqi = container_of(node, struct txq_info, schedule_order);
+
 	if (txqi->txq.sta) {
 		struct sta_info *sta = container_of(txqi->txq.sta,
 						struct sta_info, sta);
 
-		if (sta->airtime[txqi->txq.ac].deficit < 0) {
-			sta->airtime[txqi->txq.ac].deficit +=
-				sta->airtime_weight;
-			list_move_tail(&txqi->schedule_order,
-				       &local->active_txqs[txqi->txq.ac]);
-			goto begin;
+		if (sta->airtime[ac].v_t > local->airtime_v_t[ac]) {
+			if (first)
+				local->airtime_v_t[ac] = sta->airtime[ac].v_t;
+			else
+				return NULL;
 		}
 	}
 
 
-	if (txqi->schedule_round == local->schedule_round[ac])
-		return NULL;
-
-	list_del_init(&txqi->schedule_order);
-	txqi->schedule_round = local->schedule_round[ac];
+	local->schedule_pos[ac] = node;
 	return &txqi->txq;
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_return_txq(struct ieee80211_hw *hw,
+static void __ieee80211_insert_txq(struct rb_root_cached *root,
+				   struct txq_info *txqi, u8 ac)
+{
+	struct rb_node **new = &root->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct txq_info *__txqi;
+	bool leftmost = true;
+
+	while (*new) {
+		parent = *new;
+		__txqi = rb_entry(parent, struct txq_info, schedule_order);
+
+		if (!txqi->txq.sta) {
+			/* new txqi has no sta - insert to the left */
+			new = &parent->rb_left;
+		} else if (!__txqi->txq.sta) {
+			/* existing txqi has no sta - insert to the right */
+			new = &parent->rb_right;
+			leftmost = false;
+		} else {
+			struct sta_info *old_sta = container_of(__txqi->txq.sta,
+								struct sta_info,
+								sta);
+			struct sta_info *new_sta = container_of(txqi->txq.sta,
+								struct sta_info,
+								sta);
+
+			if (new_sta->airtime[ac].v_t <= old_sta->airtime[ac].v_t) {
+				new = &parent->rb_left;
+			} else {
+				new = &parent->rb_right;
+				leftmost = false;
+			}
+		}
+	}
+
+	rb_link_node(&txqi->schedule_order, parent, new);
+	rb_insert_color_cached(&txqi->schedule_order, root, leftmost);
+}
+
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
+	__acquires(txq_lock) __releases(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	u8 ac = txq->ac;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order))
+		goto out;
+
+	if (txq->sta) {
+		struct sta_info *sta = container_of(txq->sta,
+						    struct sta_info, sta);
+
+		local->airtime_weight_sum[ac] += sta->airtime_weight;
+		if (local->airtime_v_t[ac] > AIRTIME_GRACE)
+			sta->airtime[ac].v_t = max(local->airtime_v_t[ac] - AIRTIME_GRACE,
+						   sta->airtime[ac].v_t);
+	}
+
+	__ieee80211_insert_txq(&local->active_txqs[ac], txqi, ac);
+
+ out:
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_schedule_txq);
+
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
 			  struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
+	u8 ac = txq->ac;
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+		rb_erase_cached(&txqi->schedule_order,
+				&local->active_txqs[ac]);
+		RB_CLEAR_NODE(&txqi->schedule_order);
+		__ieee80211_insert_txq(&local->active_txqs[ac], txqi, ac);
+	}
+}
+
+static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+				       struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	u8 ac = txq->ac;
 
 	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
 
-	if (list_empty(&txqi->schedule_order) &&
-	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
-		/* If airtime accounting is active, always enqueue STAs at the
-		 * head of the list to ensure that they only get moved to the
-		 * back by the airtime DRR scheduler once they have a negative
-		 * deficit. A station that already has a negative deficit will
-		 * get immediately moved to the back of the list on the next
-		 * call to ieee80211_next_txq().
-		 */
-		if (txqi->txq.sta &&
-		    wiphy_ext_feature_isset(local->hw.wiphy,
-					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
-			list_add(&txqi->schedule_order,
-				 &local->active_txqs[txq->ac]);
-		else
-			list_add_tail(&txqi->schedule_order,
-				      &local->active_txqs[txq->ac]);
+	if (RB_EMPTY_NODE(&txqi->schedule_order))
+		return;
+
+	if (txq->sta) {
+		struct sta_info *sta = container_of(txq->sta,
+						    struct sta_info, sta);
+
+		local->airtime_weight_sum[ac] -= sta->airtime_weight;
 	}
+
+	rb_erase_cached(&txqi->schedule_order,
+			&local->active_txqs[txq->ac]);
+	RB_CLEAR_NODE(&txqi->schedule_order);
 }
-EXPORT_SYMBOL(ieee80211_return_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq)
 	__acquires(txq_lock) __releases(txq_lock)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	ieee80211_return_txq(hw, txq);
+	__ieee80211_unschedule_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+
+void ieee80211_return_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
+	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
+		__ieee80211_unschedule_txq(hw, txq);
+}
+EXPORT_SYMBOL(ieee80211_return_txq);
 
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
+	struct txq_info *txqi = to_txq_info(txq);
 	struct sta_info *sta;
 	u8 ac = txq->ac;
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
-		goto out;
-
-	if (list_empty(&txqi->schedule_order))
-		goto out;
-
-	list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
-				 schedule_order) {
-		if (iter == txqi)
-			break;
-
-		if (!iter->txq.sta) {
-			list_move_tail(&iter->schedule_order,
-				       &local->active_txqs[ac]);
-			continue;
-		}
-		sta = container_of(iter->txq.sta, struct sta_info, sta);
-		if (sta->airtime[ac].deficit < 0)
-			sta->airtime[ac].deficit += sta->airtime_weight;
-		list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
-	}
+		return true;
 
 	sta = container_of(txqi->txq.sta, struct sta_info, sta);
-	if (sta->airtime[ac].deficit >= 0)
-		goto out;
-
-	sta->airtime[ac].deficit += sta->airtime_weight;
-	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
-
-	return false;
-out:
-	if (!list_empty(&txqi->schedule_order))
-		list_del_init(&txqi->schedule_order);
-
-	return true;
+	return (sta->airtime[ac].v_t <= local->airtime_v_t[ac]);
 }
 EXPORT_SYMBOL(ieee80211_txq_may_transmit);
 
@@ -3769,7 +3832,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
-	local->schedule_round[ac]++;
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
@@ -3778,6 +3840,7 @@ void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
+	local->schedule_pos[ac] = NULL;
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_end);
-- 
1.9.1


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-16 13:09 ` Yibo Zhao
@ 2019-09-16 13:09   ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Yibo Zhao, Toke Høiland-Jørgensen

In a loop txqs dequeue scenario, if the first txq in the rbtree gets
removed from rbtree immediately in the ieee80211_return_txq(), the
loop will break soon in the ieee80211_next_txq() due to schedule_pos
not leading to the second txq in the rbtree. Thus, defering the
removal right before the end of this schedule round.

Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 include/net/mac80211.h     | 16 ++++++++++--
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/main.c        |  6 +++++
 net/mac80211/tx.c          | 63 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2ed8e..ba5a345 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
 
 #define IEEE80211_MAX_TX_RETRY		31
 
+#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
+
 static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate,
 					  u8 mcs, u8 nss)
 {
@@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
  * @ac: AC number to return packets from.
  *
  * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
+ * and ieee80211_txq_schedule_end(). If the txq is empty, it will be added
+ * to a remove list and get removed later.
  * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
  * is returned, it should be returned with ieee80211_return_txq() after the
  * driver has finished scheduling it.
@@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to acquire locks for
  *
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Release locks previously acquired by ieee80211_txq_schedule_end(). Check
+ * and remove the empty txq from rb-tree.
  */
 void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 	__releases(txq_lock);
@@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
 	__acquires(txq_lock) __releases(txq_lock);
 
 /**
+ * ieee80211_txqs_check - Check txqs waiting for removal
+ *
+ * @tmr: pointer as obtained from local
+ *
+ */
+void ieee80211_txqs_check(struct timer_list *tmr);
+
+/**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
  *
  * This function is used to check whether given txq is allowed to transmit by
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a4556f9..49aa143e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -847,6 +847,7 @@ struct txq_info {
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
 	struct rb_node schedule_order;
+	struct list_head candidate;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1145,6 +1146,8 @@ struct ieee80211_local {
 	u64 airtime_v_t[IEEE80211_NUM_ACS];
 	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
 
+	struct list_head remove_list[IEEE80211_NUM_ACS];
+	struct timer_list remove_timer;
 	u16 airtime_flags;
 
 	const struct ieee80211_ops *ops;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e9ffa8e..78fe24a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -667,10 +667,15 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		local->active_txqs[i] = RB_ROOT_CACHED;
+		INIT_LIST_HEAD(&local->remove_list[i]);
 		spin_lock_init(&local->active_txq_lock[i]);
 	}
 	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
 
+	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
+	mod_timer(&local->remove_timer,
+		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
+
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
 
@@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	tasklet_kill(&local->tx_pending_tasklet);
 	tasklet_kill(&local->tasklet);
 
+	del_timer_sync(&local->remove_timer);
 #ifdef CONFIG_INET
 	unregister_inetaddr_notifier(&local->ifa_notifier);
 #endif
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d00baaa..42ca010 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
 	RB_CLEAR_NODE(&txqi->schedule_order);
+	INIT_LIST_HEAD(&txqi->candidate);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 
+	if (!list_empty(&txqi->candidate))
+		list_del_init(&txqi->candidate);
+
 	if (!RB_EMPTY_NODE(&txqi->schedule_order))
 		goto out;
 
@@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 	RB_CLEAR_NODE(&txqi->schedule_order);
 }
 
+void ieee80211_remove_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+		__ieee80211_unschedule_txq(hw, txq);
+		list_del_init(&txqi->candidate);
+	}
+}
+
 void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq)
 	__acquires(txq_lock) __releases(txq_lock)
@@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	__ieee80211_unschedule_txq(hw, txq);
+	ieee80211_remove_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
 
@@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
 
 	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
-	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
-		__ieee80211_unschedule_txq(hw, txq);
+		!txq_has_queue(&txqi->txq) &&
+		list_empty(&txqi->candidate))
+		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
+
 }
 EXPORT_SYMBOL(ieee80211_return_txq);
 
+void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
+{
+	struct txq_info *iter, *tmp;
+	struct sta_info *sta;
+
+	lockdep_assert_held(&local->active_txq_lock[ac]);
+
+	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
+				 candidate) {
+		sta = container_of(iter->txq.sta, struct sta_info, sta);
+
+		if (txq_has_queue(&iter->txq))
+			list_del_init(&iter->candidate);
+		else
+			ieee80211_remove_txq(&local->hw, &iter->txq);
+	}
+}
+
+void ieee80211_txqs_check(struct timer_list *t)
+{
+	struct ieee80211_local *local = from_timer(local, t, remove_timer);
+	struct txq_info *iter, *tmp;
+	struct sta_info *sta;
+	int ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		__ieee80211_check_txqs(local, ac);
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	mod_timer(&local->remove_timer,
+		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
+}
+
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
@@ -3841,6 +3896,8 @@ void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	local->schedule_pos[ac] = NULL;
+	__ieee80211_check_txqs(local, ac);
+
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_end);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-16 13:09   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: Yibo Zhao, Toke Høiland-Jørgensen, linux-wireless

In a loop txqs dequeue scenario, if the first txq in the rbtree gets
removed from rbtree immediately in the ieee80211_return_txq(), the
loop will break soon in the ieee80211_next_txq() due to schedule_pos
not leading to the second txq in the rbtree. Thus, defering the
removal right before the end of this schedule round.

Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 include/net/mac80211.h     | 16 ++++++++++--
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/main.c        |  6 +++++
 net/mac80211/tx.c          | 63 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2ed8e..ba5a345 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
 
 #define IEEE80211_MAX_TX_RETRY		31
 
+#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
+
 static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate,
 					  u8 mcs, u8 nss)
 {
@@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
  * @ac: AC number to return packets from.
  *
  * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
+ * and ieee80211_txq_schedule_end(). If the txq is empty, it will be added
+ * to a remove list and get removed later.
  * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
  * is returned, it should be returned with ieee80211_return_txq() after the
  * driver has finished scheduling it.
@@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to acquire locks for
  *
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Release locks previously acquired by ieee80211_txq_schedule_end(). Check
+ * and remove the empty txq from rb-tree.
  */
 void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 	__releases(txq_lock);
@@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
 	__acquires(txq_lock) __releases(txq_lock);
 
 /**
+ * ieee80211_txqs_check - Check txqs waiting for removal
+ *
+ * @tmr: pointer as obtained from local
+ *
+ */
+void ieee80211_txqs_check(struct timer_list *tmr);
+
+/**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
  *
  * This function is used to check whether given txq is allowed to transmit by
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a4556f9..49aa143e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -847,6 +847,7 @@ struct txq_info {
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
 	struct rb_node schedule_order;
+	struct list_head candidate;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1145,6 +1146,8 @@ struct ieee80211_local {
 	u64 airtime_v_t[IEEE80211_NUM_ACS];
 	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
 
+	struct list_head remove_list[IEEE80211_NUM_ACS];
+	struct timer_list remove_timer;
 	u16 airtime_flags;
 
 	const struct ieee80211_ops *ops;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e9ffa8e..78fe24a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -667,10 +667,15 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		local->active_txqs[i] = RB_ROOT_CACHED;
+		INIT_LIST_HEAD(&local->remove_list[i]);
 		spin_lock_init(&local->active_txq_lock[i]);
 	}
 	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
 
+	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
+	mod_timer(&local->remove_timer,
+		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
+
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
 
@@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	tasklet_kill(&local->tx_pending_tasklet);
 	tasklet_kill(&local->tasklet);
 
+	del_timer_sync(&local->remove_timer);
 #ifdef CONFIG_INET
 	unregister_inetaddr_notifier(&local->ifa_notifier);
 #endif
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d00baaa..42ca010 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
 	RB_CLEAR_NODE(&txqi->schedule_order);
+	INIT_LIST_HEAD(&txqi->candidate);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 
+	if (!list_empty(&txqi->candidate))
+		list_del_init(&txqi->candidate);
+
 	if (!RB_EMPTY_NODE(&txqi->schedule_order))
 		goto out;
 
@@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 	RB_CLEAR_NODE(&txqi->schedule_order);
 }
 
+void ieee80211_remove_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+		__ieee80211_unschedule_txq(hw, txq);
+		list_del_init(&txqi->candidate);
+	}
+}
+
 void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq)
 	__acquires(txq_lock) __releases(txq_lock)
@@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	__ieee80211_unschedule_txq(hw, txq);
+	ieee80211_remove_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
 
@@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
 
 	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
-	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
-		__ieee80211_unschedule_txq(hw, txq);
+		!txq_has_queue(&txqi->txq) &&
+		list_empty(&txqi->candidate))
+		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
+
 }
 EXPORT_SYMBOL(ieee80211_return_txq);
 
+void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
+{
+	struct txq_info *iter, *tmp;
+	struct sta_info *sta;
+
+	lockdep_assert_held(&local->active_txq_lock[ac]);
+
+	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
+				 candidate) {
+		sta = container_of(iter->txq.sta, struct sta_info, sta);
+
+		if (txq_has_queue(&iter->txq))
+			list_del_init(&iter->candidate);
+		else
+			ieee80211_remove_txq(&local->hw, &iter->txq);
+	}
+}
+
+void ieee80211_txqs_check(struct timer_list *t)
+{
+	struct ieee80211_local *local = from_timer(local, t, remove_timer);
+	struct txq_info *iter, *tmp;
+	struct sta_info *sta;
+	int ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		__ieee80211_check_txqs(local, ac);
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	mod_timer(&local->remove_timer,
+		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
+}
+
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
@@ -3841,6 +3896,8 @@ void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	local->schedule_pos[ac] = NULL;
+	__ieee80211_check_txqs(local, ac);
+
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_end);
-- 
1.9.1


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-16 13:09 ` Yibo Zhao
@ 2019-09-16 13:09   ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Yibo Zhao, Toke Høiland-Jørgensen

If station is ineligible for transmission in ieee80211_txq_may_transmit(),
no packet will be delivered to FW. During the tests in push-pull mode with
many clients, after several seconds, not a single station is an eligible
candidate for transmission since global time is smaller than all the
station's virtual airtime. As a consequence, the Tx has been blocked and
throughput is quite low.

To avoid this situation to occur in push-pull mode, the new proposal is:

- Increase the airtime grace period a little more to reduce the
  unexpected sync

- If global virtual time is less than the virtual airtime of any station,
  sync it to the airtime of first station in the red-black tree

- Round the division result since the process of global virtual time
  involves the division calculation

Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/mac80211/sta_info.c |  3 ++-
 net/mac80211/sta_info.h |  2 +-
 net/mac80211/tx.c       | 16 +++++++++++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 9d01fdd..feac975 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1852,7 +1852,8 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 
 	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
 
-	local->airtime_v_t[ac] += airtime / weight_sum;
+	/* Round the calculation of global vt */
+	local->airtime_v_t[ac] += (airtime + (weight_sum >> 1)) / weight_sum;
 	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
 	ieee80211_resort_txq(&local->hw, txq);
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c1cac9..5055f94 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -130,7 +130,7 @@ enum ieee80211_agg_stop_reason {
 /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
 #define AIRTIME_USE_TX		BIT(0)
 #define AIRTIME_USE_RX		BIT(1)
-#define AIRTIME_GRACE 500 /* usec of grace period before reset */
+#define AIRTIME_GRACE 2000 /* usec of grace period before reset */
 
 struct airtime_info {
 	u64 rx_airtime;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 42ca010..60cf569 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3867,15 +3867,29 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct txq_info *txqi = to_txq_info(txq);
+	struct txq_info *first_txqi, *txqi = to_txq_info(txq);
+	struct rb_node *node = NULL;
 	struct sta_info *sta;
 	u8 ac = txq->ac;
+	first_txqi = NULL;
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
 		return true;
 
+	node = rb_first_cached(&local->active_txqs[ac]);
+	if (node) {
+		first_txqi = container_of(node, struct txq_info,
+					  schedule_order);
+		if (first_txqi->txq.sta) {
+			sta = container_of(first_txqi->txq.sta,
+					   struct sta_info, sta);
+			if (local->airtime_v_t[ac] < sta->airtime[ac].v_t)
+				local->airtime_v_t[ac] = sta->airtime[ac].v_t;
+		}
+	}
+
 	sta = container_of(txqi->txq.sta, struct sta_info, sta);
 	return (sta->airtime[ac].v_t <= local->airtime_v_t[ac]);
 }
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-16 13:09   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: Yibo Zhao, Toke Høiland-Jørgensen, linux-wireless

If station is ineligible for transmission in ieee80211_txq_may_transmit(),
no packet will be delivered to FW. During the tests in push-pull mode with
many clients, after several seconds, not a single station is an eligible
candidate for transmission since global time is smaller than all the
station's virtual airtime. As a consequence, the Tx has been blocked and
throughput is quite low.

To avoid this situation to occur in push-pull mode, the new proposal is:

- Increase the airtime grace period a little more to reduce the
  unexpected sync

- If global virtual time is less than the virtual airtime of any station,
  sync it to the airtime of first station in the red-black tree

- Round the division result since the process of global virtual time
  involves the division calculation

Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/mac80211/sta_info.c |  3 ++-
 net/mac80211/sta_info.h |  2 +-
 net/mac80211/tx.c       | 16 +++++++++++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 9d01fdd..feac975 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1852,7 +1852,8 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 
 	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
 
-	local->airtime_v_t[ac] += airtime / weight_sum;
+	/* Round the calculation of global vt */
+	local->airtime_v_t[ac] += (airtime + (weight_sum >> 1)) / weight_sum;
 	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
 	ieee80211_resort_txq(&local->hw, txq);
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c1cac9..5055f94 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -130,7 +130,7 @@ enum ieee80211_agg_stop_reason {
 /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
 #define AIRTIME_USE_TX		BIT(0)
 #define AIRTIME_USE_RX		BIT(1)
-#define AIRTIME_GRACE 500 /* usec of grace period before reset */
+#define AIRTIME_GRACE 2000 /* usec of grace period before reset */
 
 struct airtime_info {
 	u64 rx_airtime;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 42ca010..60cf569 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3867,15 +3867,29 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct txq_info *txqi = to_txq_info(txq);
+	struct txq_info *first_txqi, *txqi = to_txq_info(txq);
+	struct rb_node *node = NULL;
 	struct sta_info *sta;
 	u8 ac = txq->ac;
+	first_txqi = NULL;
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
 		return true;
 
+	node = rb_first_cached(&local->active_txqs[ac]);
+	if (node) {
+		first_txqi = container_of(node, struct txq_info,
+					  schedule_order);
+		if (first_txqi->txq.sta) {
+			sta = container_of(first_txqi->txq.sta,
+					   struct sta_info, sta);
+			if (local->airtime_v_t[ac] < sta->airtime[ac].v_t)
+				local->airtime_v_t[ac] = sta->airtime[ac].v_t;
+		}
+	}
+
 	sta = container_of(txqi->txq.sta, struct sta_info, sta);
 	return (sta->airtime[ac].v_t <= local->airtime_v_t[ac]);
 }
-- 
1.9.1


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
  2019-09-16 13:09 ` Yibo Zhao
@ 2019-09-16 13:09   ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: linux-wireless, Yibo Zhao

Global airtime weight sum is updated only when txq is added/removed
from rbtree. If upper layer configures sta weight during high load,
airtime weight sum will not be updated since txq is most likely on the
tree. It could a little late for upper layer to reconfigure sta weight
when txq is already in the rbtree. And thus, incorrect airtime weight sum
will lead to incorrect global virtual time calculation as well as global
airtime weight sum overflow of airtime weight sum during txq removed.

Hence, need to update airtime weight sum upon receiving event for
configuring sta weight once sta's txq is on the rbtree.

Besides, if airtime weight sum of ACs and sta weight is synced under the
same per AC lock protection, there can be a very short window causing
incorrct airtime weight sum calculation as below:

    active_txq_lock_VO                          .
    VO weight sum is syncd			.
    sta airtime weight sum is synced		.
    active_txq_unlock_VO			.
    .						.
    active_txq_lock_VI    			.
    VI weight sum is syncd			.
    sta airtime weight sum		active_txq_lock_BE
    active_txq_unlock_VI	      Remove txq and thus sum
    .				      is calculated with synced
    .				      sta airtime weight
    .					active_txq_unlock_BE

So introduce a per ac synced station airtime weight synced with per
AC synced weight sum together. And the per-AC station airtime weight
is used to calculate weight sum.

Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
---
 net/mac80211/cfg.c      | 27 +++++++++++++++++++++++++--
 net/mac80211/sta_info.c |  6 ++++--
 net/mac80211/sta_info.h |  3 +++
 net/mac80211/tx.c       |  4 ++--
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d65aa01..4b420bb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	int ret = 0;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 mask, set;
+	u32 mask, set, tid, ac;
+	struct txq_info *txqi;
 
 	sband = ieee80211_get_sband(sdata);
 	if (!sband)
@@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
-	if (params->airtime_weight)
+	if (params->airtime_weight &&
+	    params->airtime_weight != sta->airtime_weight) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			spin_lock_bh(&local->active_txq_lock[ac]);
+			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+				if (!sta->sta.txq[tid] ||
+				    ac != ieee80211_ac_from_tid(tid))
+					continue;
+
+				sta->airtime_weight_synced[ac] =
+							params->airtime_weight;
+
+				txqi = to_txq_info(sta->sta.txq[tid]);
+				if (RB_EMPTY_NODE(&txqi->schedule_order))
+					continue;
+
+				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
+								params->airtime_weight -
+								sta->airtime_weight;
+			}
+			spin_unlock_bh(&local->active_txq_lock[ac]);
+		}
 		sta->airtime_weight = params->airtime_weight;
+	}
 
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index feac975..b00812f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -389,6 +389,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime_weight_synced[i] = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1850,11 +1851,12 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 	sta->airtime[ac].tx_airtime += tx_airtime;
 	sta->airtime[ac].rx_airtime += rx_airtime;
 
-	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
+	weight_sum = local->airtime_weight_sum[ac] ?
+					: sta->airtime_weight_synced[ac];
 
 	/* Round the calculation of global vt */
 	local->airtime_v_t[ac] += (airtime + (weight_sum >> 1)) / weight_sum;
-	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
+	sta->airtime[ac].v_t += airtime / sta->airtime_weight_synced[ac];
 	ieee80211_resort_txq(&local->hw, txq);
 
 	spin_unlock_bh(&local->active_txq_lock[ac]);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5055f94..1298902 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -476,6 +476,8 @@ struct ieee80211_sta_rx_stats {
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @airtime: per-AC struct airtime_info describing airtime statistics for this
  *	station
+ * @airtime_weight_synced: station per-AC airtime weight for sync and
+ *  calculation
  * @airtime_weight: station weight for airtime fairness calculation purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
@@ -602,6 +604,7 @@ struct sta_info {
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	struct airtime_info airtime[IEEE80211_NUM_ACS];
+	u16 airtime_weight_synced[IEEE80211_NUM_ACS];
 	u16 airtime_weight;
 
 	/*
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 60cf569..3592d49 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3735,7 +3735,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] += sta->airtime_weight;
+		local->airtime_weight_sum[ac] += sta->airtime_weight_synced[ac];
 		if (local->airtime_v_t[ac] > AIRTIME_GRACE)
 			sta->airtime[ac].v_t = max(local->airtime_v_t[ac] - AIRTIME_GRACE,
 						   sta->airtime[ac].v_t);
@@ -3779,7 +3779,7 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] -= sta->airtime_weight;
+		local->airtime_weight_sum[ac] -= sta->airtime_weight_synced[ac];
 	}
 
 	rb_erase_cached(&txqi->schedule_order,
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
@ 2019-09-16 13:09   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-16 13:09 UTC (permalink / raw)
  To: ath10k; +Cc: Yibo Zhao, linux-wireless

Global airtime weight sum is updated only when txq is added/removed
from rbtree. If upper layer configures sta weight during high load,
airtime weight sum will not be updated since txq is most likely on the
tree. It could a little late for upper layer to reconfigure sta weight
when txq is already in the rbtree. And thus, incorrect airtime weight sum
will lead to incorrect global virtual time calculation as well as global
airtime weight sum overflow of airtime weight sum during txq removed.

Hence, need to update airtime weight sum upon receiving event for
configuring sta weight once sta's txq is on the rbtree.

Besides, if airtime weight sum of ACs and sta weight is synced under the
same per AC lock protection, there can be a very short window causing
incorrct airtime weight sum calculation as below:

    active_txq_lock_VO                          .
    VO weight sum is syncd			.
    sta airtime weight sum is synced		.
    active_txq_unlock_VO			.
    .						.
    active_txq_lock_VI    			.
    VI weight sum is syncd			.
    sta airtime weight sum		active_txq_lock_BE
    active_txq_unlock_VI	      Remove txq and thus sum
    .				      is calculated with synced
    .				      sta airtime weight
    .					active_txq_unlock_BE

So introduce a per ac synced station airtime weight synced with per
AC synced weight sum together. And the per-AC station airtime weight
is used to calculate weight sum.

Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
---
 net/mac80211/cfg.c      | 27 +++++++++++++++++++++++++--
 net/mac80211/sta_info.c |  6 ++++--
 net/mac80211/sta_info.h |  3 +++
 net/mac80211/tx.c       |  4 ++--
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d65aa01..4b420bb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	int ret = 0;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 mask, set;
+	u32 mask, set, tid, ac;
+	struct txq_info *txqi;
 
 	sband = ieee80211_get_sband(sdata);
 	if (!sband)
@@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
-	if (params->airtime_weight)
+	if (params->airtime_weight &&
+	    params->airtime_weight != sta->airtime_weight) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			spin_lock_bh(&local->active_txq_lock[ac]);
+			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+				if (!sta->sta.txq[tid] ||
+				    ac != ieee80211_ac_from_tid(tid))
+					continue;
+
+				sta->airtime_weight_synced[ac] =
+							params->airtime_weight;
+
+				txqi = to_txq_info(sta->sta.txq[tid]);
+				if (RB_EMPTY_NODE(&txqi->schedule_order))
+					continue;
+
+				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
+								params->airtime_weight -
+								sta->airtime_weight;
+			}
+			spin_unlock_bh(&local->active_txq_lock[ac]);
+		}
 		sta->airtime_weight = params->airtime_weight;
+	}
 
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index feac975..b00812f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -389,6 +389,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime_weight_synced[i] = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1850,11 +1851,12 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 	sta->airtime[ac].tx_airtime += tx_airtime;
 	sta->airtime[ac].rx_airtime += rx_airtime;
 
-	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
+	weight_sum = local->airtime_weight_sum[ac] ?
+					: sta->airtime_weight_synced[ac];
 
 	/* Round the calculation of global vt */
 	local->airtime_v_t[ac] += (airtime + (weight_sum >> 1)) / weight_sum;
-	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
+	sta->airtime[ac].v_t += airtime / sta->airtime_weight_synced[ac];
 	ieee80211_resort_txq(&local->hw, txq);
 
 	spin_unlock_bh(&local->active_txq_lock[ac]);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5055f94..1298902 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -476,6 +476,8 @@ struct ieee80211_sta_rx_stats {
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @airtime: per-AC struct airtime_info describing airtime statistics for this
  *	station
+ * @airtime_weight_synced: station per-AC airtime weight for sync and
+ *  calculation
  * @airtime_weight: station weight for airtime fairness calculation purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
@@ -602,6 +604,7 @@ struct sta_info {
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	struct airtime_info airtime[IEEE80211_NUM_ACS];
+	u16 airtime_weight_synced[IEEE80211_NUM_ACS];
 	u16 airtime_weight;
 
 	/*
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 60cf569..3592d49 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3735,7 +3735,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] += sta->airtime_weight;
+		local->airtime_weight_sum[ac] += sta->airtime_weight_synced[ac];
 		if (local->airtime_v_t[ac] > AIRTIME_GRACE)
 			sta->airtime[ac].v_t = max(local->airtime_v_t[ac] - AIRTIME_GRACE,
 						   sta->airtime[ac].v_t);
@@ -3779,7 +3779,7 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] -= sta->airtime_weight;
+		local->airtime_weight_sum[ac] -= sta->airtime_weight_synced[ac];
 	}
 
 	rb_erase_cached(&txqi->schedule_order,
-- 
1.9.1


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 86+ messages in thread

* Re: [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
  2019-09-16 13:09 ` Yibo Zhao
@ 2019-09-16 14:51   ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-16 14:51 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> From: Toke Høiland-Jørgensen <toke@redhat.com>
>
> This switches the airtime scheduler in mac80211 to use a virtual time-based
> scheduler instead of the round-robin scheduler used before. This has a
> couple of advantages:

Thank you for keeping at this! I'll take a look at the series in detail
tomorrow.

While you're testing things related to this, I've also prototyped a port
of the "airtime queue limit" feature from chromeos into mainline. It's
currently in my tree here:
https://git.kernel.org/pub/scm/linux/kernel/git/toke/linux.git/log/?h=mac80211-aql-01

If you have time to test it at some point, that would be awesome. I'm
planning to submit it as an RFC, but it needs a bit more work first.
Also, it's completely untested, but it does compile :)

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
@ 2019-09-16 14:51   ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-16 14:51 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> From: Toke Høiland-Jørgensen <toke@redhat.com>
>
> This switches the airtime scheduler in mac80211 to use a virtual time-based
> scheduler instead of the round-robin scheduler used before. This has a
> couple of advantages:

Thank you for keeping at this! I'll take a look at the series in detail
tomorrow.

While you're testing things related to this, I've also prototyped a port
of the "airtime queue limit" feature from chromeos into mainline. It's
currently in my tree here:
https://git.kernel.org/pub/scm/linux/kernel/git/toke/linux.git/log/?h=mac80211-aql-01

If you have time to test it at some point, that would be awesome. I'm
planning to submit it as an RFC, but it needs a bit more work first.
Also, it's completely untested, but it does compile :)

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-16 13:09   ` Yibo Zhao
@ 2019-09-16 15:27     ` Johannes Berg
  -1 siblings, 0 replies; 86+ messages in thread
From: Johannes Berg @ 2019-09-16 15:27 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless, Toke Høiland-Jørgensen

Without really looking at the code - 

> If station is ineligible for transmission in ieee80211_txq_may_transmit(),
> no packet will be delivered to FW. During the tests in push-pull mode with
> many clients, after several seconds, not a single station is an eligible
> candidate for transmission since global time is smaller than all the
> station's virtual airtime. As a consequence, the Tx has been blocked and
> throughput is quite low.

You should rewrite this to be, erm, a bit more understandable in
mac80211 context. I assume you're speaking (mostly?) about ath10k, but I
have very little context there. "push pull mode"? "firmware"? These
things are not something mac80211 knows about.

> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>

That also seems wrong, should be Toke I guess, unless you intended for a
From: Toke to be present?

johannes



^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-16 15:27     ` Johannes Berg
  0 siblings, 0 replies; 86+ messages in thread
From: Johannes Berg @ 2019-09-16 15:27 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: Toke Høiland-Jørgensen, linux-wireless

Without really looking at the code - 

> If station is ineligible for transmission in ieee80211_txq_may_transmit(),
> no packet will be delivered to FW. During the tests in push-pull mode with
> many clients, after several seconds, not a single station is an eligible
> candidate for transmission since global time is smaller than all the
> station's virtual airtime. As a consequence, the Tx has been blocked and
> throughput is quite low.

You should rewrite this to be, erm, a bit more understandable in
mac80211 context. I assume you're speaking (mostly?) about ath10k, but I
have very little context there. "push pull mode"? "firmware"? These
things are not something mac80211 knows about.

> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>

That also seems wrong, should be Toke I guess, unless you intended for a
From: Toke to be present?

johannes



_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-16 15:27     ` Johannes Berg
@ 2019-09-17  6:36       ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-17  6:36 UTC (permalink / raw)
  To: Johannes Berg
  Cc: ath10k, linux-wireless, Toke Høiland-Jørgensen,
	linux-wireless-owner

On 2019-09-16 23:27, Johannes Berg wrote:
> Without really looking at the code -
> 
>> If station is ineligible for transmission in 
>> ieee80211_txq_may_transmit(),
>> no packet will be delivered to FW. During the tests in push-pull mode 
>> with
>> many clients, after several seconds, not a single station is an 
>> eligible
>> candidate for transmission since global time is smaller than all the
>> station's virtual airtime. As a consequence, the Tx has been blocked 
>> and
>> throughput is quite low.
> 
> You should rewrite this to be, erm, a bit more understandable in
> mac80211 context. I assume you're speaking (mostly?) about ath10k, but 
> I
> have very little context there. "push pull mode"? "firmware"? These
> things are not something mac80211 knows about.
Hi Johannes,

Thanks for your kindly reminder. Will rewrite the commit log.

> 
>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
> 
> That also seems wrong, should be Toke I guess, unless you intended for 
> a
> From: Toke to be present?
Do you mean it should be something like:

Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>

Am I understanding right?
> 
> johannes

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-17  6:36       ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-17  6:36 UTC (permalink / raw)
  To: Johannes Berg
  Cc: linux-wireless-owner, Toke Høiland-Jørgensen,
	linux-wireless, ath10k

On 2019-09-16 23:27, Johannes Berg wrote:
> Without really looking at the code -
> 
>> If station is ineligible for transmission in 
>> ieee80211_txq_may_transmit(),
>> no packet will be delivered to FW. During the tests in push-pull mode 
>> with
>> many clients, after several seconds, not a single station is an 
>> eligible
>> candidate for transmission since global time is smaller than all the
>> station's virtual airtime. As a consequence, the Tx has been blocked 
>> and
>> throughput is quite low.
> 
> You should rewrite this to be, erm, a bit more understandable in
> mac80211 context. I assume you're speaking (mostly?) about ath10k, but 
> I
> have very little context there. "push pull mode"? "firmware"? These
> things are not something mac80211 knows about.
Hi Johannes,

Thanks for your kindly reminder. Will rewrite the commit log.

> 
>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
> 
> That also seems wrong, should be Toke I guess, unless you intended for 
> a
> From: Toke to be present?
Do you mean it should be something like:

Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>

Am I understanding right?
> 
> johannes

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-17  6:36       ` Yibo Zhao
@ 2019-09-17  6:55         ` Johannes Berg
  -1 siblings, 0 replies; 86+ messages in thread
From: Johannes Berg @ 2019-09-17  6:55 UTC (permalink / raw)
  To: Yibo Zhao
  Cc: ath10k, linux-wireless, Toke Høiland-Jørgensen,
	linux-wireless-owner

On Tue, 2019-09-17 at 14:36 +0800, Yibo Zhao wrote:
> 
> Do you mean it should be something like:
> 
> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>

Yes, I think you mean the right thing. For the record, it seems to me it
should be

From: A <...>

[...]

Co-developed-by: B <...>
Signed-off-by: B <...>
Signed-off-by: A <...>

or so.

IOW, I think having the same "From:" (which gets preserved in git as
"Author") and "Co-developed-by" makes no sense?

Your "From" line was implied, but I suppose you did mean that From would
be yourself (as it was in the patch) and then the above seems right.

Or you can add a "From: Toke ..." to your patch message and leave the
"Co-developed-by: yourself" I suppose, the difference is in how git will
record it.

johannes


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-17  6:55         ` Johannes Berg
  0 siblings, 0 replies; 86+ messages in thread
From: Johannes Berg @ 2019-09-17  6:55 UTC (permalink / raw)
  To: Yibo Zhao
  Cc: linux-wireless-owner, Toke Høiland-Jørgensen,
	linux-wireless, ath10k

On Tue, 2019-09-17 at 14:36 +0800, Yibo Zhao wrote:
> 
> Do you mean it should be something like:
> 
> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>

Yes, I think you mean the right thing. For the record, it seems to me it
should be

From: A <...>

[...]

Co-developed-by: B <...>
Signed-off-by: B <...>
Signed-off-by: A <...>

or so.

IOW, I think having the same "From:" (which gets preserved in git as
"Author") and "Co-developed-by" makes no sense?

Your "From" line was implied, but I suppose you did mean that From would
be yourself (as it was in the patch) and then the above seems right.

Or you can add a "From: Toke ..." to your patch message and leave the
"Co-developed-by: yourself" I suppose, the difference is in how git will
record it.

johannes


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-16 13:09   ` Yibo Zhao
@ 2019-09-17 21:10     ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:10 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless, Yibo Zhao

Yibo Zhao <yiboz@codeaurora.org> writes:

> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
> removed from rbtree immediately in the ieee80211_return_txq(), the
> loop will break soon in the ieee80211_next_txq() due to schedule_pos
> not leading to the second txq in the rbtree. Thus, defering the
> removal right before the end of this schedule round.
>
> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>

I didn't write this patch, so please don't use my sign-off. I'll add
ack or review tags as appropriate in reply; but a few comments first:

> ---
>  include/net/mac80211.h     | 16 ++++++++++--
>  net/mac80211/ieee80211_i.h |  3 +++
>  net/mac80211/main.c        |  6 +++++
>  net/mac80211/tx.c          | 63 +++++++++++++++++++++++++++++++++++++++++++---
>  4 files changed, 83 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
> index ac2ed8e..ba5a345 100644
> --- a/include/net/mac80211.h
> +++ b/include/net/mac80211.h
> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>  
>  #define IEEE80211_MAX_TX_RETRY		31
>  
> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
> +
>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate,
>  					  u8 mcs, u8 nss)
>  {
> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
>   * @ac: AC number to return packets from.
>   *
>   * Should only be called between calls to ieee80211_txq_schedule_start()
> - * and ieee80211_txq_schedule_end().
> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will be added
> + * to a remove list and get removed later.
>   * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
>   * is returned, it should be returned with ieee80211_return_txq() after the
>   * driver has finished scheduling it.
> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>   * @ac: AC number to acquire locks for
>   *
> - * Release locks previously acquired by ieee80211_txq_schedule_end().
> + * Release locks previously acquired by ieee80211_txq_schedule_end(). Check
> + * and remove the empty txq from rb-tree.
>   */
>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>  	__releases(txq_lock);
> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
>  	__acquires(txq_lock) __releases(txq_lock);
>  
>  /**
> + * ieee80211_txqs_check - Check txqs waiting for removal
> + *
> + * @tmr: pointer as obtained from local
> + *
> + */
> +void ieee80211_txqs_check(struct timer_list *tmr);
> +
> +/**
>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
>   *
>   * This function is used to check whether given txq is allowed to transmit by
> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
> index a4556f9..49aa143e 100644
> --- a/net/mac80211/ieee80211_i.h
> +++ b/net/mac80211/ieee80211_i.h
> @@ -847,6 +847,7 @@ struct txq_info {
>  	struct codel_stats cstats;
>  	struct sk_buff_head frags;
>  	struct rb_node schedule_order;
> +	struct list_head candidate;
>  	unsigned long flags;
>  
>  	/* keep last! */
> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>  
> +	struct list_head remove_list[IEEE80211_NUM_ACS];
> +	struct timer_list remove_timer;
>  	u16 airtime_flags;
>  
>  	const struct ieee80211_ops *ops;
> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
> index e9ffa8e..78fe24a 100644
> --- a/net/mac80211/main.c
> +++ b/net/mac80211/main.c
> @@ -667,10 +667,15 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
>  
>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>  		local->active_txqs[i] = RB_ROOT_CACHED;
> +		INIT_LIST_HEAD(&local->remove_list[i]);
>  		spin_lock_init(&local->active_txq_lock[i]);
>  	}
>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>  
> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
> +	mod_timer(&local->remove_timer,
> +		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
> +
>  	INIT_LIST_HEAD(&local->chanctx_list);
>  	mutex_init(&local->chanctx_mtx);
>  
> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
>  	tasklet_kill(&local->tx_pending_tasklet);
>  	tasklet_kill(&local->tasklet);
>  
> +	del_timer_sync(&local->remove_timer);
>  #ifdef CONFIG_INET
>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>  #endif
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index d00baaa..42ca010 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
>  	codel_stats_init(&txqi->cstats);
>  	__skb_queue_head_init(&txqi->frags);
>  	RB_CLEAR_NODE(&txqi->schedule_order);
> +	INIT_LIST_HEAD(&txqi->candidate);
>  
>  	txqi->txq.vif = &sdata->vif;
>  
> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
>  
>  	spin_lock_bh(&local->active_txq_lock[ac]);
>  
> +	if (!list_empty(&txqi->candidate))
> +		list_del_init(&txqi->candidate);
> +
>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>  		goto out;
>  
> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>  	RB_CLEAR_NODE(&txqi->schedule_order);
>  }
>  
> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
> +			  struct ieee80211_txq *txq)
> +{
> +	struct ieee80211_local *local = hw_to_local(hw);
> +	struct txq_info *txqi = to_txq_info(txq);
> +
> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
> +
> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
> +		__ieee80211_unschedule_txq(hw, txq);
> +		list_del_init(&txqi->candidate);
> +	}
> +}
> +
>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>  			      struct ieee80211_txq *txq)
>  	__acquires(txq_lock) __releases(txq_lock)
> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>  	struct ieee80211_local *local = hw_to_local(hw);
>  
>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
> -	__ieee80211_unschedule_txq(hw, txq);
> +	ieee80211_remove_txq(hw, txq);
>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>  }
>  
> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>  
>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
> -		__ieee80211_unschedule_txq(hw, txq);
> +		!txq_has_queue(&txqi->txq) &&
> +		list_empty(&txqi->candidate))
> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
> +
>  }
>  EXPORT_SYMBOL(ieee80211_return_txq);
>  
> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
> +{
> +	struct txq_info *iter, *tmp;
> +	struct sta_info *sta;
> +
> +	lockdep_assert_held(&local->active_txq_lock[ac]);
> +
> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
> +				 candidate) {
> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
> +
> +		if (txq_has_queue(&iter->txq))
> +			list_del_init(&iter->candidate);
> +		else
> +			ieee80211_remove_txq(&local->hw, &iter->txq);
> +	}
> +}
> +
> +void ieee80211_txqs_check(struct timer_list *t)
> +{
> +	struct ieee80211_local *local = from_timer(local, t, remove_timer);
> +	struct txq_info *iter, *tmp;
> +	struct sta_info *sta;
> +	int ac;
> +
> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
> +		spin_lock_bh(&local->active_txq_lock[ac]);
> +		__ieee80211_check_txqs(local, ac);
> +		spin_unlock_bh(&local->active_txq_lock[ac]);
> +	}
> +
> +	mod_timer(&local->remove_timer,
> +		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
> +}

I'll ask the same as I did last time (where you told me to hold off
until this round):

Why do you need the timer and the periodic check? If TXQs are added to
the remove list during the scheduling run, and __ieee80211_check_txqs()
is run from schedule_end(), isn't that sufficient to clear the list?

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-17 21:10     ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:10 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
> removed from rbtree immediately in the ieee80211_return_txq(), the
> loop will break soon in the ieee80211_next_txq() due to schedule_pos
> not leading to the second txq in the rbtree. Thus, defering the
> removal right before the end of this schedule round.
>
> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>

I didn't write this patch, so please don't use my sign-off. I'll add
ack or review tags as appropriate in reply; but a few comments first:

> ---
>  include/net/mac80211.h     | 16 ++++++++++--
>  net/mac80211/ieee80211_i.h |  3 +++
>  net/mac80211/main.c        |  6 +++++
>  net/mac80211/tx.c          | 63 +++++++++++++++++++++++++++++++++++++++++++---
>  4 files changed, 83 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
> index ac2ed8e..ba5a345 100644
> --- a/include/net/mac80211.h
> +++ b/include/net/mac80211.h
> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>  
>  #define IEEE80211_MAX_TX_RETRY		31
>  
> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
> +
>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate,
>  					  u8 mcs, u8 nss)
>  {
> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
>   * @ac: AC number to return packets from.
>   *
>   * Should only be called between calls to ieee80211_txq_schedule_start()
> - * and ieee80211_txq_schedule_end().
> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will be added
> + * to a remove list and get removed later.
>   * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
>   * is returned, it should be returned with ieee80211_return_txq() after the
>   * driver has finished scheduling it.
> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>   * @ac: AC number to acquire locks for
>   *
> - * Release locks previously acquired by ieee80211_txq_schedule_end().
> + * Release locks previously acquired by ieee80211_txq_schedule_end(). Check
> + * and remove the empty txq from rb-tree.
>   */
>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>  	__releases(txq_lock);
> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
>  	__acquires(txq_lock) __releases(txq_lock);
>  
>  /**
> + * ieee80211_txqs_check - Check txqs waiting for removal
> + *
> + * @tmr: pointer as obtained from local
> + *
> + */
> +void ieee80211_txqs_check(struct timer_list *tmr);
> +
> +/**
>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
>   *
>   * This function is used to check whether given txq is allowed to transmit by
> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
> index a4556f9..49aa143e 100644
> --- a/net/mac80211/ieee80211_i.h
> +++ b/net/mac80211/ieee80211_i.h
> @@ -847,6 +847,7 @@ struct txq_info {
>  	struct codel_stats cstats;
>  	struct sk_buff_head frags;
>  	struct rb_node schedule_order;
> +	struct list_head candidate;
>  	unsigned long flags;
>  
>  	/* keep last! */
> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>  
> +	struct list_head remove_list[IEEE80211_NUM_ACS];
> +	struct timer_list remove_timer;
>  	u16 airtime_flags;
>  
>  	const struct ieee80211_ops *ops;
> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
> index e9ffa8e..78fe24a 100644
> --- a/net/mac80211/main.c
> +++ b/net/mac80211/main.c
> @@ -667,10 +667,15 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
>  
>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>  		local->active_txqs[i] = RB_ROOT_CACHED;
> +		INIT_LIST_HEAD(&local->remove_list[i]);
>  		spin_lock_init(&local->active_txq_lock[i]);
>  	}
>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>  
> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
> +	mod_timer(&local->remove_timer,
> +		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
> +
>  	INIT_LIST_HEAD(&local->chanctx_list);
>  	mutex_init(&local->chanctx_mtx);
>  
> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
>  	tasklet_kill(&local->tx_pending_tasklet);
>  	tasklet_kill(&local->tasklet);
>  
> +	del_timer_sync(&local->remove_timer);
>  #ifdef CONFIG_INET
>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>  #endif
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index d00baaa..42ca010 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
>  	codel_stats_init(&txqi->cstats);
>  	__skb_queue_head_init(&txqi->frags);
>  	RB_CLEAR_NODE(&txqi->schedule_order);
> +	INIT_LIST_HEAD(&txqi->candidate);
>  
>  	txqi->txq.vif = &sdata->vif;
>  
> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
>  
>  	spin_lock_bh(&local->active_txq_lock[ac]);
>  
> +	if (!list_empty(&txqi->candidate))
> +		list_del_init(&txqi->candidate);
> +
>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>  		goto out;
>  
> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>  	RB_CLEAR_NODE(&txqi->schedule_order);
>  }
>  
> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
> +			  struct ieee80211_txq *txq)
> +{
> +	struct ieee80211_local *local = hw_to_local(hw);
> +	struct txq_info *txqi = to_txq_info(txq);
> +
> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
> +
> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
> +		__ieee80211_unschedule_txq(hw, txq);
> +		list_del_init(&txqi->candidate);
> +	}
> +}
> +
>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>  			      struct ieee80211_txq *txq)
>  	__acquires(txq_lock) __releases(txq_lock)
> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>  	struct ieee80211_local *local = hw_to_local(hw);
>  
>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
> -	__ieee80211_unschedule_txq(hw, txq);
> +	ieee80211_remove_txq(hw, txq);
>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>  }
>  
> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>  
>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
> -		__ieee80211_unschedule_txq(hw, txq);
> +		!txq_has_queue(&txqi->txq) &&
> +		list_empty(&txqi->candidate))
> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
> +
>  }
>  EXPORT_SYMBOL(ieee80211_return_txq);
>  
> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
> +{
> +	struct txq_info *iter, *tmp;
> +	struct sta_info *sta;
> +
> +	lockdep_assert_held(&local->active_txq_lock[ac]);
> +
> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
> +				 candidate) {
> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
> +
> +		if (txq_has_queue(&iter->txq))
> +			list_del_init(&iter->candidate);
> +		else
> +			ieee80211_remove_txq(&local->hw, &iter->txq);
> +	}
> +}
> +
> +void ieee80211_txqs_check(struct timer_list *t)
> +{
> +	struct ieee80211_local *local = from_timer(local, t, remove_timer);
> +	struct txq_info *iter, *tmp;
> +	struct sta_info *sta;
> +	int ac;
> +
> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
> +		spin_lock_bh(&local->active_txq_lock[ac]);
> +		__ieee80211_check_txqs(local, ac);
> +		spin_unlock_bh(&local->active_txq_lock[ac]);
> +	}
> +
> +	mod_timer(&local->remove_timer,
> +		  jiffies + msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
> +}

I'll ask the same as I did last time (where you told me to hold off
until this round):

Why do you need the timer and the periodic check? If TXQs are added to
the remove list during the scheduling run, and __ieee80211_check_txqs()
is run from schedule_end(), isn't that sufficient to clear the list?

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-17  6:36       ` Yibo Zhao
@ 2019-09-17 21:12         ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:12 UTC (permalink / raw)
  To: Yibo Zhao, Johannes Berg; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-16 23:27, Johannes Berg wrote:
>> Without really looking at the code -
>> 
>>> If station is ineligible for transmission in 
>>> ieee80211_txq_may_transmit(),
>>> no packet will be delivered to FW. During the tests in push-pull mode 
>>> with
>>> many clients, after several seconds, not a single station is an 
>>> eligible
>>> candidate for transmission since global time is smaller than all the
>>> station's virtual airtime. As a consequence, the Tx has been blocked 
>>> and
>>> throughput is quite low.
>> 
>> You should rewrite this to be, erm, a bit more understandable in
>> mac80211 context. I assume you're speaking (mostly?) about ath10k, but 
>> I
>> have very little context there. "push pull mode"? "firmware"? These
>> things are not something mac80211 knows about.
> Hi Johannes,
>
> Thanks for your kindly reminder. Will rewrite the commit log.
>
>> 
>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>> 
>> That also seems wrong, should be Toke I guess, unless you intended for 
>> a
>> From: Toke to be present?
> Do you mean it should be something like:
>
> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>
> Am I understanding right?

I think the right thing here, as with the previous patch, is to just
drop my sign-off; you're writing this patch, and I'll add ack/reviews as
appropriate. And in that case, well, no need to have co-developed-by
yourself when your name is on the patch as author :)

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-17 21:12         ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:12 UTC (permalink / raw)
  To: Yibo Zhao, Johannes Berg; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-16 23:27, Johannes Berg wrote:
>> Without really looking at the code -
>> 
>>> If station is ineligible for transmission in 
>>> ieee80211_txq_may_transmit(),
>>> no packet will be delivered to FW. During the tests in push-pull mode 
>>> with
>>> many clients, after several seconds, not a single station is an 
>>> eligible
>>> candidate for transmission since global time is smaller than all the
>>> station's virtual airtime. As a consequence, the Tx has been blocked 
>>> and
>>> throughput is quite low.
>> 
>> You should rewrite this to be, erm, a bit more understandable in
>> mac80211 context. I assume you're speaking (mostly?) about ath10k, but 
>> I
>> have very little context there. "push pull mode"? "firmware"? These
>> things are not something mac80211 knows about.
> Hi Johannes,
>
> Thanks for your kindly reminder. Will rewrite the commit log.
>
>> 
>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>> 
>> That also seems wrong, should be Toke I guess, unless you intended for 
>> a
>> From: Toke to be present?
> Do you mean it should be something like:
>
> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>
> Am I understanding right?

I think the right thing here, as with the previous patch, is to just
drop my sign-off; you're writing this patch, and I'll add ack/reviews as
appropriate. And in that case, well, no need to have co-developed-by
yourself when your name is on the patch as author :)

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
  2019-09-16 13:09   ` Yibo Zhao
@ 2019-09-17 21:24     ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:24 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless, Yibo Zhao

Yibo Zhao <yiboz@codeaurora.org> writes:

> Global airtime weight sum is updated only when txq is added/removed
> from rbtree. If upper layer configures sta weight during high load,
> airtime weight sum will not be updated since txq is most likely on the
> tree. It could a little late for upper layer to reconfigure sta weight
> when txq is already in the rbtree. And thus, incorrect airtime weight sum
> will lead to incorrect global virtual time calculation as well as global
> airtime weight sum overflow of airtime weight sum during txq removed.
>
> Hence, need to update airtime weight sum upon receiving event for
> configuring sta weight once sta's txq is on the rbtree.
>
> Besides, if airtime weight sum of ACs and sta weight is synced under the
> same per AC lock protection, there can be a very short window causing
> incorrct airtime weight sum calculation as below:
>
>     active_txq_lock_VO                          .
>     VO weight sum is syncd			.
>     sta airtime weight sum is synced		.
>     active_txq_unlock_VO			.
>     .						.
>     active_txq_lock_VI    			.
>     VI weight sum is syncd			.
>     sta airtime weight sum		active_txq_lock_BE
>     active_txq_unlock_VI	      Remove txq and thus sum
>     .				      is calculated with synced
>     .				      sta airtime weight
>     .					active_txq_unlock_BE
>
> So introduce a per ac synced station airtime weight synced with per
> AC synced weight sum together. And the per-AC station airtime weight
> is used to calculate weight sum.
>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> ---
>  net/mac80211/cfg.c      | 27 +++++++++++++++++++++++++--
>  net/mac80211/sta_info.c |  6 ++++--
>  net/mac80211/sta_info.h |  3 +++
>  net/mac80211/tx.c       |  4 ++--
>  4 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
> index d65aa01..4b420bb 100644
> --- a/net/mac80211/cfg.c
> +++ b/net/mac80211/cfg.c
> @@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
>  	int ret = 0;
>  	struct ieee80211_supported_band *sband;
>  	struct ieee80211_sub_if_data *sdata = sta->sdata;
> -	u32 mask, set;
> +	u32 mask, set, tid, ac;
> +	struct txq_info *txqi;
>  
>  	sband = ieee80211_get_sband(sdata);
>  	if (!sband)
> @@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct ieee80211_local *local,
>  	if (ieee80211_vif_is_mesh(&sdata->vif))
>  		sta_apply_mesh_params(local, sta, params);
>  
> -	if (params->airtime_weight)
> +	if (params->airtime_weight &&
> +	    params->airtime_weight != sta->airtime_weight) {
> +		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
> +			spin_lock_bh(&local->active_txq_lock[ac]);
> +			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
> +				if (!sta->sta.txq[tid] ||
> +				    ac != ieee80211_ac_from_tid(tid))
> +					continue;
> +
> +				sta->airtime_weight_synced[ac] =
> +							params->airtime_weight;
> +
> +				txqi = to_txq_info(sta->sta.txq[tid]);
> +				if (RB_EMPTY_NODE(&txqi->schedule_order))
> +					continue;
> +
> +				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
> +								params->airtime_weight -
> +								sta->airtime_weight;
> +			}
> +			spin_unlock_bh(&local->active_txq_lock[ac]);
> +		}
>  		sta->airtime_weight = params->airtime_weight;

With this, airtime_weight is basically only used to return to and from
userspace, right? I.e., after the above loop has run, it will match the
contents of airtime_weight_synced; so why not just turn airtime_weight
into  a per-ac array? You could just use airtime_weight[0] as the value
to return to userspace...

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
@ 2019-09-17 21:24     ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:24 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> Global airtime weight sum is updated only when txq is added/removed
> from rbtree. If upper layer configures sta weight during high load,
> airtime weight sum will not be updated since txq is most likely on the
> tree. It could a little late for upper layer to reconfigure sta weight
> when txq is already in the rbtree. And thus, incorrect airtime weight sum
> will lead to incorrect global virtual time calculation as well as global
> airtime weight sum overflow of airtime weight sum during txq removed.
>
> Hence, need to update airtime weight sum upon receiving event for
> configuring sta weight once sta's txq is on the rbtree.
>
> Besides, if airtime weight sum of ACs and sta weight is synced under the
> same per AC lock protection, there can be a very short window causing
> incorrct airtime weight sum calculation as below:
>
>     active_txq_lock_VO                          .
>     VO weight sum is syncd			.
>     sta airtime weight sum is synced		.
>     active_txq_unlock_VO			.
>     .						.
>     active_txq_lock_VI    			.
>     VI weight sum is syncd			.
>     sta airtime weight sum		active_txq_lock_BE
>     active_txq_unlock_VI	      Remove txq and thus sum
>     .				      is calculated with synced
>     .				      sta airtime weight
>     .					active_txq_unlock_BE
>
> So introduce a per ac synced station airtime weight synced with per
> AC synced weight sum together. And the per-AC station airtime weight
> is used to calculate weight sum.
>
> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
> ---
>  net/mac80211/cfg.c      | 27 +++++++++++++++++++++++++--
>  net/mac80211/sta_info.c |  6 ++++--
>  net/mac80211/sta_info.h |  3 +++
>  net/mac80211/tx.c       |  4 ++--
>  4 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
> index d65aa01..4b420bb 100644
> --- a/net/mac80211/cfg.c
> +++ b/net/mac80211/cfg.c
> @@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
>  	int ret = 0;
>  	struct ieee80211_supported_band *sband;
>  	struct ieee80211_sub_if_data *sdata = sta->sdata;
> -	u32 mask, set;
> +	u32 mask, set, tid, ac;
> +	struct txq_info *txqi;
>  
>  	sband = ieee80211_get_sband(sdata);
>  	if (!sband)
> @@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct ieee80211_local *local,
>  	if (ieee80211_vif_is_mesh(&sdata->vif))
>  		sta_apply_mesh_params(local, sta, params);
>  
> -	if (params->airtime_weight)
> +	if (params->airtime_weight &&
> +	    params->airtime_weight != sta->airtime_weight) {
> +		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
> +			spin_lock_bh(&local->active_txq_lock[ac]);
> +			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
> +				if (!sta->sta.txq[tid] ||
> +				    ac != ieee80211_ac_from_tid(tid))
> +					continue;
> +
> +				sta->airtime_weight_synced[ac] =
> +							params->airtime_weight;
> +
> +				txqi = to_txq_info(sta->sta.txq[tid]);
> +				if (RB_EMPTY_NODE(&txqi->schedule_order))
> +					continue;
> +
> +				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
> +								params->airtime_weight -
> +								sta->airtime_weight;
> +			}
> +			spin_unlock_bh(&local->active_txq_lock[ac]);
> +		}
>  		sta->airtime_weight = params->airtime_weight;

With this, airtime_weight is basically only used to return to and from
userspace, right? I.e., after the above loop has run, it will match the
contents of airtime_weight_synced; so why not just turn airtime_weight
into  a per-ac array? You could just use airtime_weight[0] as the value
to return to userspace...

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
  2019-09-16 13:09 ` Yibo Zhao
@ 2019-09-17 21:31   ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:31 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> From: Toke Høiland-Jørgensen <toke@redhat.com>
>
> This switches the airtime scheduler in mac80211 to use a virtual time-based
> scheduler instead of the round-robin scheduler used before. This has a
> couple of advantages:
>
> - No need to sync up the round-robin scheduler in firmware/hardware with
>   the round-robin airtime scheduler.
>
> - If several stations are eligible for transmission we can schedule both of
>   them; no need to hard-block the scheduling rotation until the head of the
>   queue has used up its quantum.
>
> - The check of whether a station is eligible for transmission becomes
>   simpler (in ieee80211_txq_may_transmit()).
>
> The drawback is that scheduling becomes slightly more expensive, as we need
> to maintain an rbtree of TXQs sorted by virtual time. This means that
> ieee80211_register_airtime() becomes O(logN) in the number of currently
> scheduled TXQs. However, hopefully this number rarely grows too big (it's
> only TXQs currently backlogged, not all associated stations), so it
> shouldn't be too big of an issue.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>

I'll note that this patch still has the two issues that Felix pointed
out when I posted the RFC version. Namely:

- The use of divisions in the fast path. I guess I need to go write some
  reciprocal-calculation code, since that is also an issue with the AQL
  patches I linked to before.

- The fact that we don't count the airtime usage of multicast traffic,
  which with this series means that the vif TXQ will get priority over
  the others. I think we agreed to fix this by just adding an airtime
  v_t to the vif as well and use that for scheduling the TXQ. Does
  ath10k report airtime usage for multicast as well, or only for
  stations?


-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
@ 2019-09-17 21:31   ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-17 21:31 UTC (permalink / raw)
  To: Yibo Zhao, ath10k; +Cc: linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> From: Toke Høiland-Jørgensen <toke@redhat.com>
>
> This switches the airtime scheduler in mac80211 to use a virtual time-based
> scheduler instead of the round-robin scheduler used before. This has a
> couple of advantages:
>
> - No need to sync up the round-robin scheduler in firmware/hardware with
>   the round-robin airtime scheduler.
>
> - If several stations are eligible for transmission we can schedule both of
>   them; no need to hard-block the scheduling rotation until the head of the
>   queue has used up its quantum.
>
> - The check of whether a station is eligible for transmission becomes
>   simpler (in ieee80211_txq_may_transmit()).
>
> The drawback is that scheduling becomes slightly more expensive, as we need
> to maintain an rbtree of TXQs sorted by virtual time. This means that
> ieee80211_register_airtime() becomes O(logN) in the number of currently
> scheduled TXQs. However, hopefully this number rarely grows too big (it's
> only TXQs currently backlogged, not all associated stations), so it
> shouldn't be too big of an issue.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>

I'll note that this patch still has the two issues that Felix pointed
out when I posted the RFC version. Namely:

- The use of divisions in the fast path. I guess I need to go write some
  reciprocal-calculation code, since that is also an issue with the AQL
  patches I linked to before.

- The fact that we don't count the airtime usage of multicast traffic,
  which with this series means that the vif TXQ will get priority over
  the others. I think we agreed to fix this by just adding an airtime
  v_t to the vif as well and use that for scheduling the TXQ. Does
  ath10k report airtime usage for multicast as well, or only for
  stations?


-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-17 21:12         ` Toke Høiland-Jørgensen
@ 2019-09-18 10:02           ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:02 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Johannes Berg, ath10k, linux-wireless, linux-wireless-owner

On 2019-09-18 05:12, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-16 23:27, Johannes Berg wrote:
>>> Without really looking at the code -
>>> 
>>>> If station is ineligible for transmission in
>>>> ieee80211_txq_may_transmit(),
>>>> no packet will be delivered to FW. During the tests in push-pull 
>>>> mode
>>>> with
>>>> many clients, after several seconds, not a single station is an
>>>> eligible
>>>> candidate for transmission since global time is smaller than all the
>>>> station's virtual airtime. As a consequence, the Tx has been blocked
>>>> and
>>>> throughput is quite low.
>>> 
>>> You should rewrite this to be, erm, a bit more understandable in
>>> mac80211 context. I assume you're speaking (mostly?) about ath10k, 
>>> but
>>> I
>>> have very little context there. "push pull mode"? "firmware"? These
>>> things are not something mac80211 knows about.
>> Hi Johannes,
>> 
>> Thanks for your kindly reminder. Will rewrite the commit log.
>> 
>>> 
>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>> 
>>> That also seems wrong, should be Toke I guess, unless you intended 
>>> for
>>> a
>>> From: Toke to be present?
>> Do you mean it should be something like:
>> 
>> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> 
>> Am I understanding right?
> 
> I think the right thing here, as with the previous patch, is to just
> drop my sign-off; you're writing this patch, and I'll add ack/reviews 
> as
> appropriate. And in that case, well, no need to have co-developed-by
> yourself when your name is on the patch as author :)
> 
> -Toke
Sorry, I think I have missed checking your reply, please ignore the 
wrong signed-off in PATCH-V2.

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-18 10:02           ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:02 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, Johannes Berg, linux-wireless, ath10k

On 2019-09-18 05:12, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-16 23:27, Johannes Berg wrote:
>>> Without really looking at the code -
>>> 
>>>> If station is ineligible for transmission in
>>>> ieee80211_txq_may_transmit(),
>>>> no packet will be delivered to FW. During the tests in push-pull 
>>>> mode
>>>> with
>>>> many clients, after several seconds, not a single station is an
>>>> eligible
>>>> candidate for transmission since global time is smaller than all the
>>>> station's virtual airtime. As a consequence, the Tx has been blocked
>>>> and
>>>> throughput is quite low.
>>> 
>>> You should rewrite this to be, erm, a bit more understandable in
>>> mac80211 context. I assume you're speaking (mostly?) about ath10k, 
>>> but
>>> I
>>> have very little context there. "push pull mode"? "firmware"? These
>>> things are not something mac80211 knows about.
>> Hi Johannes,
>> 
>> Thanks for your kindly reminder. Will rewrite the commit log.
>> 
>>> 
>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>> 
>>> That also seems wrong, should be Toke I guess, unless you intended 
>>> for
>>> a
>>> From: Toke to be present?
>> Do you mean it should be something like:
>> 
>> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> 
>> Am I understanding right?
> 
> I think the right thing here, as with the previous patch, is to just
> drop my sign-off; you're writing this patch, and I'll add ack/reviews 
> as
> appropriate. And in that case, well, no need to have co-developed-by
> yourself when your name is on the patch as author :)
> 
> -Toke
Sorry, I think I have missed checking your reply, please ignore the 
wrong signed-off in PATCH-V2.

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-18 10:02           ` Yibo Zhao
@ 2019-09-18 10:16             ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-18 10:16 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: Johannes Berg, ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-18 05:12, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-16 23:27, Johannes Berg wrote:
>>>> Without really looking at the code -
>>>> 
>>>>> If station is ineligible for transmission in
>>>>> ieee80211_txq_may_transmit(),
>>>>> no packet will be delivered to FW. During the tests in push-pull 
>>>>> mode
>>>>> with
>>>>> many clients, after several seconds, not a single station is an
>>>>> eligible
>>>>> candidate for transmission since global time is smaller than all the
>>>>> station's virtual airtime. As a consequence, the Tx has been blocked
>>>>> and
>>>>> throughput is quite low.
>>>> 
>>>> You should rewrite this to be, erm, a bit more understandable in
>>>> mac80211 context. I assume you're speaking (mostly?) about ath10k, 
>>>> but
>>>> I
>>>> have very little context there. "push pull mode"? "firmware"? These
>>>> things are not something mac80211 knows about.
>>> Hi Johannes,
>>> 
>>> Thanks for your kindly reminder. Will rewrite the commit log.
>>> 
>>>> 
>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> 
>>>> That also seems wrong, should be Toke I guess, unless you intended 
>>>> for
>>>> a
>>>> From: Toke to be present?
>>> Do you mean it should be something like:
>>> 
>>> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> 
>>> Am I understanding right?
>> 
>> I think the right thing here, as with the previous patch, is to just
>> drop my sign-off; you're writing this patch, and I'll add ack/reviews 
>> as
>> appropriate. And in that case, well, no need to have co-developed-by
>> yourself when your name is on the patch as author :)
>> 
>> -Toke
> Sorry, I think I have missed checking your reply, please ignore the 
> wrong signed-off in PATCH-V2.

While you're re-spinning, could you please add a changelog for the
changes you make? Makes it easier to keep track :)

You can add a cover-letter with a full changelog instead of having a
separate changelog for each patch; that's what I usually do...

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-18 10:16             ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-18 10:16 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, Johannes Berg, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-18 05:12, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-16 23:27, Johannes Berg wrote:
>>>> Without really looking at the code -
>>>> 
>>>>> If station is ineligible for transmission in
>>>>> ieee80211_txq_may_transmit(),
>>>>> no packet will be delivered to FW. During the tests in push-pull 
>>>>> mode
>>>>> with
>>>>> many clients, after several seconds, not a single station is an
>>>>> eligible
>>>>> candidate for transmission since global time is smaller than all the
>>>>> station's virtual airtime. As a consequence, the Tx has been blocked
>>>>> and
>>>>> throughput is quite low.
>>>> 
>>>> You should rewrite this to be, erm, a bit more understandable in
>>>> mac80211 context. I assume you're speaking (mostly?) about ath10k, 
>>>> but
>>>> I
>>>> have very little context there. "push pull mode"? "firmware"? These
>>>> things are not something mac80211 knows about.
>>> Hi Johannes,
>>> 
>>> Thanks for your kindly reminder. Will rewrite the commit log.
>>> 
>>>> 
>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> 
>>>> That also seems wrong, should be Toke I guess, unless you intended 
>>>> for
>>>> a
>>>> From: Toke to be present?
>>> Do you mean it should be something like:
>>> 
>>> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> 
>>> Am I understanding right?
>> 
>> I think the right thing here, as with the previous patch, is to just
>> drop my sign-off; you're writing this patch, and I'll add ack/reviews 
>> as
>> appropriate. And in that case, well, no need to have co-developed-by
>> yourself when your name is on the patch as author :)
>> 
>> -Toke
> Sorry, I think I have missed checking your reply, please ignore the 
> wrong signed-off in PATCH-V2.

While you're re-spinning, could you please add a changelog for the
changes you make? Makes it easier to keep track :)

You can add a cover-letter with a full changelog instead of having a
separate changelog for each patch; that's what I usually do...

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
  2019-09-17 21:24     ` Toke Høiland-Jørgensen
@ 2019-09-18 10:16       ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:16 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-18 05:24, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> Global airtime weight sum is updated only when txq is added/removed
>> from rbtree. If upper layer configures sta weight during high load,
>> airtime weight sum will not be updated since txq is most likely on the
>> tree. It could a little late for upper layer to reconfigure sta weight
>> when txq is already in the rbtree. And thus, incorrect airtime weight 
>> sum
>> will lead to incorrect global virtual time calculation as well as 
>> global
>> airtime weight sum overflow of airtime weight sum during txq removed.
>> 
>> Hence, need to update airtime weight sum upon receiving event for
>> configuring sta weight once sta's txq is on the rbtree.
>> 
>> Besides, if airtime weight sum of ACs and sta weight is synced under 
>> the
>> same per AC lock protection, there can be a very short window causing
>> incorrct airtime weight sum calculation as below:
>> 
>>     active_txq_lock_VO                          .
>>     VO weight sum is syncd			.
>>     sta airtime weight sum is synced		.
>>     active_txq_unlock_VO			.
>>     .						.
>>     active_txq_lock_VI    			.
>>     VI weight sum is syncd			.
>>     sta airtime weight sum		active_txq_lock_BE
>>     active_txq_unlock_VI	      Remove txq and thus sum
>>     .				      is calculated with synced
>>     .				      sta airtime weight
>>     .					active_txq_unlock_BE
>> 
>> So introduce a per ac synced station airtime weight synced with per
>> AC synced weight sum together. And the per-AC station airtime weight
>> is used to calculate weight sum.
>> 
>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>> ---
>>  net/mac80211/cfg.c      | 27 +++++++++++++++++++++++++--
>>  net/mac80211/sta_info.c |  6 ++++--
>>  net/mac80211/sta_info.h |  3 +++
>>  net/mac80211/tx.c       |  4 ++--
>>  4 files changed, 34 insertions(+), 6 deletions(-)
>> 
>> diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
>> index d65aa01..4b420bb 100644
>> --- a/net/mac80211/cfg.c
>> +++ b/net/mac80211/cfg.c
>> @@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct 
>> ieee80211_local *local,
>>  	int ret = 0;
>>  	struct ieee80211_supported_band *sband;
>>  	struct ieee80211_sub_if_data *sdata = sta->sdata;
>> -	u32 mask, set;
>> +	u32 mask, set, tid, ac;
>> +	struct txq_info *txqi;
>> 
>>  	sband = ieee80211_get_sband(sdata);
>>  	if (!sband)
>> @@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct 
>> ieee80211_local *local,
>>  	if (ieee80211_vif_is_mesh(&sdata->vif))
>>  		sta_apply_mesh_params(local, sta, params);
>> 
>> -	if (params->airtime_weight)
>> +	if (params->airtime_weight &&
>> +	    params->airtime_weight != sta->airtime_weight) {
>> +		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>> +			spin_lock_bh(&local->active_txq_lock[ac]);
>> +			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
>> +				if (!sta->sta.txq[tid] ||
>> +				    ac != ieee80211_ac_from_tid(tid))
>> +					continue;
>> +
>> +				sta->airtime_weight_synced[ac] =
>> +							params->airtime_weight;
>> +
>> +				txqi = to_txq_info(sta->sta.txq[tid]);
>> +				if (RB_EMPTY_NODE(&txqi->schedule_order))
>> +					continue;
>> +
>> +				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
>> +								params->airtime_weight -
>> +								sta->airtime_weight;
>> +			}
>> +			spin_unlock_bh(&local->active_txq_lock[ac]);
>> +		}
>>  		sta->airtime_weight = params->airtime_weight;
> 
> With this, airtime_weight is basically only used to return to and from
> userspace, right? I.e., after the above loop has run, it will match the
> contents of airtime_weight_synced; so why not just turn airtime_weight
> into  a per-ac array? You could just use airtime_weight[0] as the value
> to return to userspace...
Yes, I also feel it is a little weird to keep both of them. I am fine 
with suggestion.

> 
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
@ 2019-09-18 10:16       ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:16 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-18 05:24, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> Global airtime weight sum is updated only when txq is added/removed
>> from rbtree. If upper layer configures sta weight during high load,
>> airtime weight sum will not be updated since txq is most likely on the
>> tree. It could a little late for upper layer to reconfigure sta weight
>> when txq is already in the rbtree. And thus, incorrect airtime weight 
>> sum
>> will lead to incorrect global virtual time calculation as well as 
>> global
>> airtime weight sum overflow of airtime weight sum during txq removed.
>> 
>> Hence, need to update airtime weight sum upon receiving event for
>> configuring sta weight once sta's txq is on the rbtree.
>> 
>> Besides, if airtime weight sum of ACs and sta weight is synced under 
>> the
>> same per AC lock protection, there can be a very short window causing
>> incorrct airtime weight sum calculation as below:
>> 
>>     active_txq_lock_VO                          .
>>     VO weight sum is syncd			.
>>     sta airtime weight sum is synced		.
>>     active_txq_unlock_VO			.
>>     .						.
>>     active_txq_lock_VI    			.
>>     VI weight sum is syncd			.
>>     sta airtime weight sum		active_txq_lock_BE
>>     active_txq_unlock_VI	      Remove txq and thus sum
>>     .				      is calculated with synced
>>     .				      sta airtime weight
>>     .					active_txq_unlock_BE
>> 
>> So introduce a per ac synced station airtime weight synced with per
>> AC synced weight sum together. And the per-AC station airtime weight
>> is used to calculate weight sum.
>> 
>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>> ---
>>  net/mac80211/cfg.c      | 27 +++++++++++++++++++++++++--
>>  net/mac80211/sta_info.c |  6 ++++--
>>  net/mac80211/sta_info.h |  3 +++
>>  net/mac80211/tx.c       |  4 ++--
>>  4 files changed, 34 insertions(+), 6 deletions(-)
>> 
>> diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
>> index d65aa01..4b420bb 100644
>> --- a/net/mac80211/cfg.c
>> +++ b/net/mac80211/cfg.c
>> @@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct 
>> ieee80211_local *local,
>>  	int ret = 0;
>>  	struct ieee80211_supported_band *sband;
>>  	struct ieee80211_sub_if_data *sdata = sta->sdata;
>> -	u32 mask, set;
>> +	u32 mask, set, tid, ac;
>> +	struct txq_info *txqi;
>> 
>>  	sband = ieee80211_get_sband(sdata);
>>  	if (!sband)
>> @@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct 
>> ieee80211_local *local,
>>  	if (ieee80211_vif_is_mesh(&sdata->vif))
>>  		sta_apply_mesh_params(local, sta, params);
>> 
>> -	if (params->airtime_weight)
>> +	if (params->airtime_weight &&
>> +	    params->airtime_weight != sta->airtime_weight) {
>> +		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>> +			spin_lock_bh(&local->active_txq_lock[ac]);
>> +			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
>> +				if (!sta->sta.txq[tid] ||
>> +				    ac != ieee80211_ac_from_tid(tid))
>> +					continue;
>> +
>> +				sta->airtime_weight_synced[ac] =
>> +							params->airtime_weight;
>> +
>> +				txqi = to_txq_info(sta->sta.txq[tid]);
>> +				if (RB_EMPTY_NODE(&txqi->schedule_order))
>> +					continue;
>> +
>> +				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
>> +								params->airtime_weight -
>> +								sta->airtime_weight;
>> +			}
>> +			spin_unlock_bh(&local->active_txq_lock[ac]);
>> +		}
>>  		sta->airtime_weight = params->airtime_weight;
> 
> With this, airtime_weight is basically only used to return to and from
> userspace, right? I.e., after the above loop has run, it will match the
> contents of airtime_weight_synced; so why not just turn airtime_weight
> into  a per-ac array? You could just use airtime_weight[0] as the value
> to return to userspace...
Yes, I also feel it is a little weird to keep both of them. I am fine 
with suggestion.

> 
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
  2019-09-18 10:16             ` Toke Høiland-Jørgensen
@ 2019-09-18 10:18               ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:18 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: Johannes Berg, ath10k, linux-wireless, linux-wireless-owner

On 2019-09-18 18:16, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-18 05:12, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-16 23:27, Johannes Berg wrote:
>>>>> Without really looking at the code -
>>>>> 
>>>>>> If station is ineligible for transmission in
>>>>>> ieee80211_txq_may_transmit(),
>>>>>> no packet will be delivered to FW. During the tests in push-pull
>>>>>> mode
>>>>>> with
>>>>>> many clients, after several seconds, not a single station is an
>>>>>> eligible
>>>>>> candidate for transmission since global time is smaller than all 
>>>>>> the
>>>>>> station's virtual airtime. As a consequence, the Tx has been 
>>>>>> blocked
>>>>>> and
>>>>>> throughput is quite low.
>>>>> 
>>>>> You should rewrite this to be, erm, a bit more understandable in
>>>>> mac80211 context. I assume you're speaking (mostly?) about ath10k,
>>>>> but
>>>>> I
>>>>> have very little context there. "push pull mode"? "firmware"? These
>>>>> things are not something mac80211 knows about.
>>>> Hi Johannes,
>>>> 
>>>> Thanks for your kindly reminder. Will rewrite the commit log.
>>>> 
>>>>> 
>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>> 
>>>>> That also seems wrong, should be Toke I guess, unless you intended
>>>>> for
>>>>> a
>>>>> From: Toke to be present?
>>>> Do you mean it should be something like:
>>>> 
>>>> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>> 
>>>> Am I understanding right?
>>> 
>>> I think the right thing here, as with the previous patch, is to just
>>> drop my sign-off; you're writing this patch, and I'll add ack/reviews
>>> as
>>> appropriate. And in that case, well, no need to have co-developed-by
>>> yourself when your name is on the patch as author :)
>>> 
>>> -Toke
>> Sorry, I think I have missed checking your reply, please ignore the
>> wrong signed-off in PATCH-V2.
> 
> While you're re-spinning, could you please add a changelog for the
> changes you make? Makes it easier to keep track :)
> 
> You can add a cover-letter with a full changelog instead of having a
> separate changelog for each patch; that's what I usually do...
> 
> -Toke
Sure, thanks.
-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 3/4] mac80211: fix low throughput in push pull mode
@ 2019-09-18 10:18               ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:18 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, Johannes Berg, linux-wireless, ath10k

On 2019-09-18 18:16, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-18 05:12, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-16 23:27, Johannes Berg wrote:
>>>>> Without really looking at the code -
>>>>> 
>>>>>> If station is ineligible for transmission in
>>>>>> ieee80211_txq_may_transmit(),
>>>>>> no packet will be delivered to FW. During the tests in push-pull
>>>>>> mode
>>>>>> with
>>>>>> many clients, after several seconds, not a single station is an
>>>>>> eligible
>>>>>> candidate for transmission since global time is smaller than all 
>>>>>> the
>>>>>> station's virtual airtime. As a consequence, the Tx has been 
>>>>>> blocked
>>>>>> and
>>>>>> throughput is quite low.
>>>>> 
>>>>> You should rewrite this to be, erm, a bit more understandable in
>>>>> mac80211 context. I assume you're speaking (mostly?) about ath10k,
>>>>> but
>>>>> I
>>>>> have very little context there. "push pull mode"? "firmware"? These
>>>>> things are not something mac80211 knows about.
>>>> Hi Johannes,
>>>> 
>>>> Thanks for your kindly reminder. Will rewrite the commit log.
>>>> 
>>>>> 
>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>> 
>>>>> That also seems wrong, should be Toke I guess, unless you intended
>>>>> for
>>>>> a
>>>>> From: Toke to be present?
>>>> Do you mean it should be something like:
>>>> 
>>>> Co-developed-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>> 
>>>> Am I understanding right?
>>> 
>>> I think the right thing here, as with the previous patch, is to just
>>> drop my sign-off; you're writing this patch, and I'll add ack/reviews
>>> as
>>> appropriate. And in that case, well, no need to have co-developed-by
>>> yourself when your name is on the patch as author :)
>>> 
>>> -Toke
>> Sorry, I think I have missed checking your reply, please ignore the
>> wrong signed-off in PATCH-V2.
> 
> While you're re-spinning, could you please add a changelog for the
> changes you make? Makes it easier to keep track :)
> 
> You can add a cover-letter with a full changelog instead of having a
> separate changelog for each patch; that's what I usually do...
> 
> -Toke
Sure, thanks.
-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-17 21:10     ` Toke Høiland-Jørgensen
@ 2019-09-18 10:27       ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:27 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: ath10k, linux-wireless

On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>> removed from rbtree immediately in the ieee80211_return_txq(), the
>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>> not leading to the second txq in the rbtree. Thus, defering the
>> removal right before the end of this schedule round.
>> 
>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
> 
> I didn't write this patch, so please don't use my sign-off. I'll add
> ack or review tags as appropriate in reply; but a few comments first:
> 
>> ---
>>  include/net/mac80211.h     | 16 ++++++++++--
>>  net/mac80211/ieee80211_i.h |  3 +++
>>  net/mac80211/main.c        |  6 +++++
>>  net/mac80211/tx.c          | 63 
>> +++++++++++++++++++++++++++++++++++++++++++---
>>  4 files changed, 83 insertions(+), 5 deletions(-)
>> 
>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>> index ac2ed8e..ba5a345 100644
>> --- a/include/net/mac80211.h
>> +++ b/include/net/mac80211.h
>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>> 
>>  #define IEEE80211_MAX_TX_RETRY		31
>> 
>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>> +
>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate 
>> *rate,
>>  					  u8 mcs, u8 nss)
>>  {
>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct 
>> ieee80211_hw *hw,
>>   * @ac: AC number to return packets from.
>>   *
>>   * Should only be called between calls to 
>> ieee80211_txq_schedule_start()
>> - * and ieee80211_txq_schedule_end().
>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will be 
>> added
>> + * to a remove list and get removed later.
>>   * Returns the next txq if successful, %NULL if no queue is eligible. 
>> If a txq
>>   * is returned, it should be returned with ieee80211_return_txq() 
>> after the
>>   * driver has finished scheduling it.
>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct 
>> ieee80211_hw *hw, u8 ac)
>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>   * @ac: AC number to acquire locks for
>>   *
>> - * Release locks previously acquired by ieee80211_txq_schedule_end().
>> + * Release locks previously acquired by ieee80211_txq_schedule_end(). 
>> Check
>> + * and remove the empty txq from rb-tree.
>>   */
>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>  	__releases(txq_lock);
>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>> *hw, struct ieee80211_txq *txq)
>>  	__acquires(txq_lock) __releases(txq_lock);
>> 
>>  /**
>> + * ieee80211_txqs_check - Check txqs waiting for removal
>> + *
>> + * @tmr: pointer as obtained from local
>> + *
>> + */
>> +void ieee80211_txqs_check(struct timer_list *tmr);
>> +
>> +/**
>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to 
>> transmit
>>   *
>>   * This function is used to check whether given txq is allowed to 
>> transmit by
>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>> index a4556f9..49aa143e 100644
>> --- a/net/mac80211/ieee80211_i.h
>> +++ b/net/mac80211/ieee80211_i.h
>> @@ -847,6 +847,7 @@ struct txq_info {
>>  	struct codel_stats cstats;
>>  	struct sk_buff_head frags;
>>  	struct rb_node schedule_order;
>> +	struct list_head candidate;
>>  	unsigned long flags;
>> 
>>  	/* keep last! */
>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>> 
>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>> +	struct timer_list remove_timer;
>>  	u16 airtime_flags;
>> 
>>  	const struct ieee80211_ops *ops;
>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>> index e9ffa8e..78fe24a 100644
>> --- a/net/mac80211/main.c
>> +++ b/net/mac80211/main.c
>> @@ -667,10 +667,15 @@ struct ieee80211_hw 
>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>> 
>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>  		spin_lock_init(&local->active_txq_lock[i]);
>>  	}
>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>> 
>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>> +	mod_timer(&local->remove_timer,
>> +		  jiffies + 
>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>> +
>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>  	mutex_init(&local->chanctx_mtx);
>> 
>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw 
>> *hw)
>>  	tasklet_kill(&local->tx_pending_tasklet);
>>  	tasklet_kill(&local->tasklet);
>> 
>> +	del_timer_sync(&local->remove_timer);
>>  #ifdef CONFIG_INET
>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>  #endif
>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>> index d00baaa..42ca010 100644
>> --- a/net/mac80211/tx.c
>> +++ b/net/mac80211/tx.c
>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct 
>> ieee80211_sub_if_data *sdata,
>>  	codel_stats_init(&txqi->cstats);
>>  	__skb_queue_head_init(&txqi->frags);
>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>> +	INIT_LIST_HEAD(&txqi->candidate);
>> 
>>  	txqi->txq.vif = &sdata->vif;
>> 
>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>> *hw,
>> 
>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>> 
>> +	if (!list_empty(&txqi->candidate))
>> +		list_del_init(&txqi->candidate);
>> +
>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>  		goto out;
>> 
>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct 
>> ieee80211_hw *hw,
>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>  }
>> 
>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>> +			  struct ieee80211_txq *txq)
>> +{
>> +	struct ieee80211_local *local = hw_to_local(hw);
>> +	struct txq_info *txqi = to_txq_info(txq);
>> +
>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>> +
>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>> +		__ieee80211_unschedule_txq(hw, txq);
>> +		list_del_init(&txqi->candidate);
>> +	}
>> +}
>> +
>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>  			      struct ieee80211_txq *txq)
>>  	__acquires(txq_lock) __releases(txq_lock)
>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct 
>> ieee80211_hw *hw,
>>  	struct ieee80211_local *local = hw_to_local(hw);
>> 
>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>> -	__ieee80211_unschedule_txq(hw, txq);
>> +	ieee80211_remove_txq(hw, txq);
>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>  }
>> 
>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw 
>> *hw,
>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>> 
>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>> -		__ieee80211_unschedule_txq(hw, txq);
>> +		!txq_has_queue(&txqi->txq) &&
>> +		list_empty(&txqi->candidate))
>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>> +
>>  }
>>  EXPORT_SYMBOL(ieee80211_return_txq);
>> 
>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>> +{
>> +	struct txq_info *iter, *tmp;
>> +	struct sta_info *sta;
>> +
>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>> +
>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>> +				 candidate) {
>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>> +
>> +		if (txq_has_queue(&iter->txq))
>> +			list_del_init(&iter->candidate);
>> +		else
>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>> +	}
>> +}
>> +
>> +void ieee80211_txqs_check(struct timer_list *t)
>> +{
>> +	struct ieee80211_local *local = from_timer(local, t, remove_timer);
>> +	struct txq_info *iter, *tmp;
>> +	struct sta_info *sta;
>> +	int ac;
>> +
>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>> +		__ieee80211_check_txqs(local, ac);
>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>> +	}
>> +
>> +	mod_timer(&local->remove_timer,
>> +		  jiffies + 
>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>> +}
> 
> I'll ask the same as I did last time (where you told me to hold off
> until this round):
> 
> Why do you need the timer and the periodic check? If TXQs are added to
> the remove list during the scheduling run, and __ieee80211_check_txqs()
> is run from schedule_end(), isn't that sufficient to clear the list?
Is it possible that a txq is not added to the remove list but then 
packets in it are dropped by fq_codel algo? Like the station disconnects 
without any notification.

> 
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-18 10:27       ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-18 10:27 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: linux-wireless, ath10k

On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>> removed from rbtree immediately in the ieee80211_return_txq(), the
>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>> not leading to the second txq in the rbtree. Thus, defering the
>> removal right before the end of this schedule round.
>> 
>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
> 
> I didn't write this patch, so please don't use my sign-off. I'll add
> ack or review tags as appropriate in reply; but a few comments first:
> 
>> ---
>>  include/net/mac80211.h     | 16 ++++++++++--
>>  net/mac80211/ieee80211_i.h |  3 +++
>>  net/mac80211/main.c        |  6 +++++
>>  net/mac80211/tx.c          | 63 
>> +++++++++++++++++++++++++++++++++++++++++++---
>>  4 files changed, 83 insertions(+), 5 deletions(-)
>> 
>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>> index ac2ed8e..ba5a345 100644
>> --- a/include/net/mac80211.h
>> +++ b/include/net/mac80211.h
>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>> 
>>  #define IEEE80211_MAX_TX_RETRY		31
>> 
>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>> +
>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate 
>> *rate,
>>  					  u8 mcs, u8 nss)
>>  {
>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct 
>> ieee80211_hw *hw,
>>   * @ac: AC number to return packets from.
>>   *
>>   * Should only be called between calls to 
>> ieee80211_txq_schedule_start()
>> - * and ieee80211_txq_schedule_end().
>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will be 
>> added
>> + * to a remove list and get removed later.
>>   * Returns the next txq if successful, %NULL if no queue is eligible. 
>> If a txq
>>   * is returned, it should be returned with ieee80211_return_txq() 
>> after the
>>   * driver has finished scheduling it.
>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct 
>> ieee80211_hw *hw, u8 ac)
>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>   * @ac: AC number to acquire locks for
>>   *
>> - * Release locks previously acquired by ieee80211_txq_schedule_end().
>> + * Release locks previously acquired by ieee80211_txq_schedule_end(). 
>> Check
>> + * and remove the empty txq from rb-tree.
>>   */
>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>  	__releases(txq_lock);
>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>> *hw, struct ieee80211_txq *txq)
>>  	__acquires(txq_lock) __releases(txq_lock);
>> 
>>  /**
>> + * ieee80211_txqs_check - Check txqs waiting for removal
>> + *
>> + * @tmr: pointer as obtained from local
>> + *
>> + */
>> +void ieee80211_txqs_check(struct timer_list *tmr);
>> +
>> +/**
>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to 
>> transmit
>>   *
>>   * This function is used to check whether given txq is allowed to 
>> transmit by
>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>> index a4556f9..49aa143e 100644
>> --- a/net/mac80211/ieee80211_i.h
>> +++ b/net/mac80211/ieee80211_i.h
>> @@ -847,6 +847,7 @@ struct txq_info {
>>  	struct codel_stats cstats;
>>  	struct sk_buff_head frags;
>>  	struct rb_node schedule_order;
>> +	struct list_head candidate;
>>  	unsigned long flags;
>> 
>>  	/* keep last! */
>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>> 
>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>> +	struct timer_list remove_timer;
>>  	u16 airtime_flags;
>> 
>>  	const struct ieee80211_ops *ops;
>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>> index e9ffa8e..78fe24a 100644
>> --- a/net/mac80211/main.c
>> +++ b/net/mac80211/main.c
>> @@ -667,10 +667,15 @@ struct ieee80211_hw 
>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>> 
>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>  		spin_lock_init(&local->active_txq_lock[i]);
>>  	}
>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>> 
>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>> +	mod_timer(&local->remove_timer,
>> +		  jiffies + 
>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>> +
>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>  	mutex_init(&local->chanctx_mtx);
>> 
>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw 
>> *hw)
>>  	tasklet_kill(&local->tx_pending_tasklet);
>>  	tasklet_kill(&local->tasklet);
>> 
>> +	del_timer_sync(&local->remove_timer);
>>  #ifdef CONFIG_INET
>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>  #endif
>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>> index d00baaa..42ca010 100644
>> --- a/net/mac80211/tx.c
>> +++ b/net/mac80211/tx.c
>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct 
>> ieee80211_sub_if_data *sdata,
>>  	codel_stats_init(&txqi->cstats);
>>  	__skb_queue_head_init(&txqi->frags);
>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>> +	INIT_LIST_HEAD(&txqi->candidate);
>> 
>>  	txqi->txq.vif = &sdata->vif;
>> 
>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>> *hw,
>> 
>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>> 
>> +	if (!list_empty(&txqi->candidate))
>> +		list_del_init(&txqi->candidate);
>> +
>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>  		goto out;
>> 
>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct 
>> ieee80211_hw *hw,
>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>  }
>> 
>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>> +			  struct ieee80211_txq *txq)
>> +{
>> +	struct ieee80211_local *local = hw_to_local(hw);
>> +	struct txq_info *txqi = to_txq_info(txq);
>> +
>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>> +
>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>> +		__ieee80211_unschedule_txq(hw, txq);
>> +		list_del_init(&txqi->candidate);
>> +	}
>> +}
>> +
>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>  			      struct ieee80211_txq *txq)
>>  	__acquires(txq_lock) __releases(txq_lock)
>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct 
>> ieee80211_hw *hw,
>>  	struct ieee80211_local *local = hw_to_local(hw);
>> 
>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>> -	__ieee80211_unschedule_txq(hw, txq);
>> +	ieee80211_remove_txq(hw, txq);
>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>  }
>> 
>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw 
>> *hw,
>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>> 
>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>> -		__ieee80211_unschedule_txq(hw, txq);
>> +		!txq_has_queue(&txqi->txq) &&
>> +		list_empty(&txqi->candidate))
>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>> +
>>  }
>>  EXPORT_SYMBOL(ieee80211_return_txq);
>> 
>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>> +{
>> +	struct txq_info *iter, *tmp;
>> +	struct sta_info *sta;
>> +
>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>> +
>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>> +				 candidate) {
>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>> +
>> +		if (txq_has_queue(&iter->txq))
>> +			list_del_init(&iter->candidate);
>> +		else
>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>> +	}
>> +}
>> +
>> +void ieee80211_txqs_check(struct timer_list *t)
>> +{
>> +	struct ieee80211_local *local = from_timer(local, t, remove_timer);
>> +	struct txq_info *iter, *tmp;
>> +	struct sta_info *sta;
>> +	int ac;
>> +
>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>> +		__ieee80211_check_txqs(local, ac);
>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>> +	}
>> +
>> +	mod_timer(&local->remove_timer,
>> +		  jiffies + 
>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>> +}
> 
> I'll ask the same as I did last time (where you told me to hold off
> until this round):
> 
> Why do you need the timer and the periodic check? If TXQs are added to
> the remove list during the scheduling run, and __ieee80211_check_txqs()
> is run from schedule_end(), isn't that sufficient to clear the list?
Is it possible that a txq is not added to the remove list but then 
packets in it are dropped by fq_codel algo? Like the station disconnects 
without any notification.

> 
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-18 10:27       ` Yibo Zhao
@ 2019-09-18 11:23         ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-18 11:23 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>>> not leading to the second txq in the rbtree. Thus, defering the
>>> removal right before the end of this schedule round.
>>> 
>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> 
>> I didn't write this patch, so please don't use my sign-off. I'll add
>> ack or review tags as appropriate in reply; but a few comments first:
>> 
>>> ---
>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>  net/mac80211/main.c        |  6 +++++
>>>  net/mac80211/tx.c          | 63 
>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>> 
>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>> index ac2ed8e..ba5a345 100644
>>> --- a/include/net/mac80211.h
>>> +++ b/include/net/mac80211.h
>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>> 
>>>  #define IEEE80211_MAX_TX_RETRY		31
>>> 
>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>> +
>>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate 
>>> *rate,
>>>  					  u8 mcs, u8 nss)
>>>  {
>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct 
>>> ieee80211_hw *hw,
>>>   * @ac: AC number to return packets from.
>>>   *
>>>   * Should only be called between calls to 
>>> ieee80211_txq_schedule_start()
>>> - * and ieee80211_txq_schedule_end().
>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will be 
>>> added
>>> + * to a remove list and get removed later.
>>>   * Returns the next txq if successful, %NULL if no queue is eligible. 
>>> If a txq
>>>   * is returned, it should be returned with ieee80211_return_txq() 
>>> after the
>>>   * driver has finished scheduling it.
>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct 
>>> ieee80211_hw *hw, u8 ac)
>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>   * @ac: AC number to acquire locks for
>>>   *
>>> - * Release locks previously acquired by ieee80211_txq_schedule_end().
>>> + * Release locks previously acquired by ieee80211_txq_schedule_end(). 
>>> Check
>>> + * and remove the empty txq from rb-tree.
>>>   */
>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>  	__releases(txq_lock);
>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>>> *hw, struct ieee80211_txq *txq)
>>>  	__acquires(txq_lock) __releases(txq_lock);
>>> 
>>>  /**
>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>> + *
>>> + * @tmr: pointer as obtained from local
>>> + *
>>> + */
>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>> +
>>> +/**
>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to 
>>> transmit
>>>   *
>>>   * This function is used to check whether given txq is allowed to 
>>> transmit by
>>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>>> index a4556f9..49aa143e 100644
>>> --- a/net/mac80211/ieee80211_i.h
>>> +++ b/net/mac80211/ieee80211_i.h
>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>  	struct codel_stats cstats;
>>>  	struct sk_buff_head frags;
>>>  	struct rb_node schedule_order;
>>> +	struct list_head candidate;
>>>  	unsigned long flags;
>>> 
>>>  	/* keep last! */
>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>> 
>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>> +	struct timer_list remove_timer;
>>>  	u16 airtime_flags;
>>> 
>>>  	const struct ieee80211_ops *ops;
>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>> index e9ffa8e..78fe24a 100644
>>> --- a/net/mac80211/main.c
>>> +++ b/net/mac80211/main.c
>>> @@ -667,10 +667,15 @@ struct ieee80211_hw 
>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>> 
>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>  	}
>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>> 
>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>> +	mod_timer(&local->remove_timer,
>>> +		  jiffies + 
>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>> +
>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>  	mutex_init(&local->chanctx_mtx);
>>> 
>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw 
>>> *hw)
>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>  	tasklet_kill(&local->tasklet);
>>> 
>>> +	del_timer_sync(&local->remove_timer);
>>>  #ifdef CONFIG_INET
>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>  #endif
>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>> index d00baaa..42ca010 100644
>>> --- a/net/mac80211/tx.c
>>> +++ b/net/mac80211/tx.c
>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct 
>>> ieee80211_sub_if_data *sdata,
>>>  	codel_stats_init(&txqi->cstats);
>>>  	__skb_queue_head_init(&txqi->frags);
>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>> 
>>>  	txqi->txq.vif = &sdata->vif;
>>> 
>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>>> *hw,
>>> 
>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>> 
>>> +	if (!list_empty(&txqi->candidate))
>>> +		list_del_init(&txqi->candidate);
>>> +
>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>  		goto out;
>>> 
>>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct 
>>> ieee80211_hw *hw,
>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>  }
>>> 
>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>> +			  struct ieee80211_txq *txq)
>>> +{
>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>> +	struct txq_info *txqi = to_txq_info(txq);
>>> +
>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>> +
>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>> +		__ieee80211_unschedule_txq(hw, txq);
>>> +		list_del_init(&txqi->candidate);
>>> +	}
>>> +}
>>> +
>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>  			      struct ieee80211_txq *txq)
>>>  	__acquires(txq_lock) __releases(txq_lock)
>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct 
>>> ieee80211_hw *hw,
>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>> 
>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>> -	__ieee80211_unschedule_txq(hw, txq);
>>> +	ieee80211_remove_txq(hw, txq);
>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>  }
>>> 
>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw 
>>> *hw,
>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>> 
>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>>> -		__ieee80211_unschedule_txq(hw, txq);
>>> +		!txq_has_queue(&txqi->txq) &&
>>> +		list_empty(&txqi->candidate))
>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>> +
>>>  }
>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>> 
>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>>> +{
>>> +	struct txq_info *iter, *tmp;
>>> +	struct sta_info *sta;
>>> +
>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>> +
>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>> +				 candidate) {
>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>> +
>>> +		if (txq_has_queue(&iter->txq))
>>> +			list_del_init(&iter->candidate);
>>> +		else
>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>> +	}
>>> +}
>>> +
>>> +void ieee80211_txqs_check(struct timer_list *t)
>>> +{
>>> +	struct ieee80211_local *local = from_timer(local, t, remove_timer);
>>> +	struct txq_info *iter, *tmp;
>>> +	struct sta_info *sta;
>>> +	int ac;
>>> +
>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>> +		__ieee80211_check_txqs(local, ac);
>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>> +	}
>>> +
>>> +	mod_timer(&local->remove_timer,
>>> +		  jiffies + 
>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>> +}
>> 
>> I'll ask the same as I did last time (where you told me to hold off
>> until this round):
>> 
>> Why do you need the timer and the periodic check? If TXQs are added to
>> the remove list during the scheduling run, and __ieee80211_check_txqs()
>> is run from schedule_end(), isn't that sufficient to clear the list?
> Is it possible that a txq is not added to the remove list but then 
> packets in it are dropped by fq_codel algo? Like the station disconnects 
> without any notification.

Well as long as all the other cleanup paths call directly into
__unschedule_txq(), that should remove stations from the scheduler when
they disconnect etc.

We only need to defer removal inside a single "scheduling round" (i.e.,
between a pair of ieee80211_txq_schedule_start/end. So if we just walk
the remove list in schedule_end() we should be enough, no?

Hmm, or maybe a simpler way to fix the original issue is just to have
unschedule_txq() update the schedule_pos() pointer?

I.e., unschedule_txq checks if the txq being removed is currently being
pointed to by schedule_pos[ac], and if it is, it updates schedule_pos to
be the rb_next of the current value?

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-18 11:23         ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-18 11:23 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>>> not leading to the second txq in the rbtree. Thus, defering the
>>> removal right before the end of this schedule round.
>>> 
>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> 
>> I didn't write this patch, so please don't use my sign-off. I'll add
>> ack or review tags as appropriate in reply; but a few comments first:
>> 
>>> ---
>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>  net/mac80211/main.c        |  6 +++++
>>>  net/mac80211/tx.c          | 63 
>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>> 
>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>> index ac2ed8e..ba5a345 100644
>>> --- a/include/net/mac80211.h
>>> +++ b/include/net/mac80211.h
>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>> 
>>>  #define IEEE80211_MAX_TX_RETRY		31
>>> 
>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>> +
>>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate 
>>> *rate,
>>>  					  u8 mcs, u8 nss)
>>>  {
>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct 
>>> ieee80211_hw *hw,
>>>   * @ac: AC number to return packets from.
>>>   *
>>>   * Should only be called between calls to 
>>> ieee80211_txq_schedule_start()
>>> - * and ieee80211_txq_schedule_end().
>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will be 
>>> added
>>> + * to a remove list and get removed later.
>>>   * Returns the next txq if successful, %NULL if no queue is eligible. 
>>> If a txq
>>>   * is returned, it should be returned with ieee80211_return_txq() 
>>> after the
>>>   * driver has finished scheduling it.
>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct 
>>> ieee80211_hw *hw, u8 ac)
>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>   * @ac: AC number to acquire locks for
>>>   *
>>> - * Release locks previously acquired by ieee80211_txq_schedule_end().
>>> + * Release locks previously acquired by ieee80211_txq_schedule_end(). 
>>> Check
>>> + * and remove the empty txq from rb-tree.
>>>   */
>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>  	__releases(txq_lock);
>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>>> *hw, struct ieee80211_txq *txq)
>>>  	__acquires(txq_lock) __releases(txq_lock);
>>> 
>>>  /**
>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>> + *
>>> + * @tmr: pointer as obtained from local
>>> + *
>>> + */
>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>> +
>>> +/**
>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to 
>>> transmit
>>>   *
>>>   * This function is used to check whether given txq is allowed to 
>>> transmit by
>>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>>> index a4556f9..49aa143e 100644
>>> --- a/net/mac80211/ieee80211_i.h
>>> +++ b/net/mac80211/ieee80211_i.h
>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>  	struct codel_stats cstats;
>>>  	struct sk_buff_head frags;
>>>  	struct rb_node schedule_order;
>>> +	struct list_head candidate;
>>>  	unsigned long flags;
>>> 
>>>  	/* keep last! */
>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>> 
>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>> +	struct timer_list remove_timer;
>>>  	u16 airtime_flags;
>>> 
>>>  	const struct ieee80211_ops *ops;
>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>> index e9ffa8e..78fe24a 100644
>>> --- a/net/mac80211/main.c
>>> +++ b/net/mac80211/main.c
>>> @@ -667,10 +667,15 @@ struct ieee80211_hw 
>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>> 
>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>  	}
>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>> 
>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>> +	mod_timer(&local->remove_timer,
>>> +		  jiffies + 
>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>> +
>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>  	mutex_init(&local->chanctx_mtx);
>>> 
>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw 
>>> *hw)
>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>  	tasklet_kill(&local->tasklet);
>>> 
>>> +	del_timer_sync(&local->remove_timer);
>>>  #ifdef CONFIG_INET
>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>  #endif
>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>> index d00baaa..42ca010 100644
>>> --- a/net/mac80211/tx.c
>>> +++ b/net/mac80211/tx.c
>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct 
>>> ieee80211_sub_if_data *sdata,
>>>  	codel_stats_init(&txqi->cstats);
>>>  	__skb_queue_head_init(&txqi->frags);
>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>> 
>>>  	txqi->txq.vif = &sdata->vif;
>>> 
>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct ieee80211_hw 
>>> *hw,
>>> 
>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>> 
>>> +	if (!list_empty(&txqi->candidate))
>>> +		list_del_init(&txqi->candidate);
>>> +
>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>  		goto out;
>>> 
>>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct 
>>> ieee80211_hw *hw,
>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>  }
>>> 
>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>> +			  struct ieee80211_txq *txq)
>>> +{
>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>> +	struct txq_info *txqi = to_txq_info(txq);
>>> +
>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>> +
>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>> +		__ieee80211_unschedule_txq(hw, txq);
>>> +		list_del_init(&txqi->candidate);
>>> +	}
>>> +}
>>> +
>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>  			      struct ieee80211_txq *txq)
>>>  	__acquires(txq_lock) __releases(txq_lock)
>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct 
>>> ieee80211_hw *hw,
>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>> 
>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>> -	__ieee80211_unschedule_txq(hw, txq);
>>> +	ieee80211_remove_txq(hw, txq);
>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>  }
>>> 
>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct ieee80211_hw 
>>> *hw,
>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>> 
>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>>> -		__ieee80211_unschedule_txq(hw, txq);
>>> +		!txq_has_queue(&txqi->txq) &&
>>> +		list_empty(&txqi->candidate))
>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>> +
>>>  }
>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>> 
>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>>> +{
>>> +	struct txq_info *iter, *tmp;
>>> +	struct sta_info *sta;
>>> +
>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>> +
>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>> +				 candidate) {
>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>> +
>>> +		if (txq_has_queue(&iter->txq))
>>> +			list_del_init(&iter->candidate);
>>> +		else
>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>> +	}
>>> +}
>>> +
>>> +void ieee80211_txqs_check(struct timer_list *t)
>>> +{
>>> +	struct ieee80211_local *local = from_timer(local, t, remove_timer);
>>> +	struct txq_info *iter, *tmp;
>>> +	struct sta_info *sta;
>>> +	int ac;
>>> +
>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>> +		__ieee80211_check_txqs(local, ac);
>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>> +	}
>>> +
>>> +	mod_timer(&local->remove_timer,
>>> +		  jiffies + 
>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>> +}
>> 
>> I'll ask the same as I did last time (where you told me to hold off
>> until this round):
>> 
>> Why do you need the timer and the periodic check? If TXQs are added to
>> the remove list during the scheduling run, and __ieee80211_check_txqs()
>> is run from schedule_end(), isn't that sufficient to clear the list?
> Is it possible that a txq is not added to the remove list but then 
> packets in it are dropped by fq_codel algo? Like the station disconnects 
> without any notification.

Well as long as all the other cleanup paths call directly into
__unschedule_txq(), that should remove stations from the scheduler when
they disconnect etc.

We only need to defer removal inside a single "scheduling round" (i.e.,
between a pair of ieee80211_txq_schedule_start/end. So if we just walk
the remove list in schedule_end() we should be enough, no?

Hmm, or maybe a simpler way to fix the original issue is just to have
unschedule_txq() update the schedule_pos() pointer?

I.e., unschedule_txq checks if the txq being removed is currently being
pointed to by schedule_pos[ac], and if it is, it updates schedule_pos to
be the rb_next of the current value?

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-18 11:23         ` Toke Høiland-Jørgensen
@ 2019-09-19  9:56           ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-19  9:56 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>> removal right before the end of this schedule round.
>>>> 
>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> 
>>> I didn't write this patch, so please don't use my sign-off. I'll add
>>> ack or review tags as appropriate in reply; but a few comments first:
>>> 
>>>> ---
>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>  net/mac80211/main.c        |  6 +++++
>>>>  net/mac80211/tx.c          | 63
>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>> 
>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>> index ac2ed8e..ba5a345 100644
>>>> --- a/include/net/mac80211.h
>>>> +++ b/include/net/mac80211.h
>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>> 
>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>> 
>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>> +
>>>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate
>>>> *rate,
>>>>  					  u8 mcs, u8 nss)
>>>>  {
>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>> ieee80211_hw *hw,
>>>>   * @ac: AC number to return packets from.
>>>>   *
>>>>   * Should only be called between calls to
>>>> ieee80211_txq_schedule_start()
>>>> - * and ieee80211_txq_schedule_end().
>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will 
>>>> be
>>>> added
>>>> + * to a remove list and get removed later.
>>>>   * Returns the next txq if successful, %NULL if no queue is 
>>>> eligible.
>>>> If a txq
>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>> after the
>>>>   * driver has finished scheduling it.
>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>> ieee80211_hw *hw, u8 ac)
>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>   * @ac: AC number to acquire locks for
>>>>   *
>>>> - * Release locks previously acquired by 
>>>> ieee80211_txq_schedule_end().
>>>> + * Release locks previously acquired by 
>>>> ieee80211_txq_schedule_end().
>>>> Check
>>>> + * and remove the empty txq from rb-tree.
>>>>   */
>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>  	__releases(txq_lock);
>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct 
>>>> ieee80211_hw
>>>> *hw, struct ieee80211_txq *txq)
>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>> 
>>>>  /**
>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>> + *
>>>> + * @tmr: pointer as obtained from local
>>>> + *
>>>> + */
>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>> +
>>>> +/**
>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>> transmit
>>>>   *
>>>>   * This function is used to check whether given txq is allowed to
>>>> transmit by
>>>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>>>> index a4556f9..49aa143e 100644
>>>> --- a/net/mac80211/ieee80211_i.h
>>>> +++ b/net/mac80211/ieee80211_i.h
>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>  	struct codel_stats cstats;
>>>>  	struct sk_buff_head frags;
>>>>  	struct rb_node schedule_order;
>>>> +	struct list_head candidate;
>>>>  	unsigned long flags;
>>>> 
>>>>  	/* keep last! */
>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>> 
>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>> +	struct timer_list remove_timer;
>>>>  	u16 airtime_flags;
>>>> 
>>>>  	const struct ieee80211_ops *ops;
>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>> index e9ffa8e..78fe24a 100644
>>>> --- a/net/mac80211/main.c
>>>> +++ b/net/mac80211/main.c
>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>> 
>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>  	}
>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>> 
>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>> +	mod_timer(&local->remove_timer,
>>>> +		  jiffies +
>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>> +
>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>  	mutex_init(&local->chanctx_mtx);
>>>> 
>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct 
>>>> ieee80211_hw
>>>> *hw)
>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>  	tasklet_kill(&local->tasklet);
>>>> 
>>>> +	del_timer_sync(&local->remove_timer);
>>>>  #ifdef CONFIG_INET
>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>  #endif
>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>> index d00baaa..42ca010 100644
>>>> --- a/net/mac80211/tx.c
>>>> +++ b/net/mac80211/tx.c
>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>> ieee80211_sub_if_data *sdata,
>>>>  	codel_stats_init(&txqi->cstats);
>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>> 
>>>>  	txqi->txq.vif = &sdata->vif;
>>>> 
>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct 
>>>> ieee80211_hw
>>>> *hw,
>>>> 
>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>> 
>>>> +	if (!list_empty(&txqi->candidate))
>>>> +		list_del_init(&txqi->candidate);
>>>> +
>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>  		goto out;
>>>> 
>>>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct
>>>> ieee80211_hw *hw,
>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>  }
>>>> 
>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>> +			  struct ieee80211_txq *txq)
>>>> +{
>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>> +
>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>> +
>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>> +		list_del_init(&txqi->candidate);
>>>> +	}
>>>> +}
>>>> +
>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>  			      struct ieee80211_txq *txq)
>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>> ieee80211_hw *hw,
>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>> 
>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>> +	ieee80211_remove_txq(hw, txq);
>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>  }
>>>> 
>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct 
>>>> ieee80211_hw
>>>> *hw,
>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>> 
>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>> +		!txq_has_queue(&txqi->txq) &&
>>>> +		list_empty(&txqi->candidate))
>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>> +
>>>>  }
>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>> 
>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>>>> +{
>>>> +	struct txq_info *iter, *tmp;
>>>> +	struct sta_info *sta;
>>>> +
>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>> +
>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>> +				 candidate) {
>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>> +
>>>> +		if (txq_has_queue(&iter->txq))
>>>> +			list_del_init(&iter->candidate);
>>>> +		else
>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>> +	}
>>>> +}
>>>> +
>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>> +{
>>>> +	struct ieee80211_local *local = from_timer(local, t, 
>>>> remove_timer);
>>>> +	struct txq_info *iter, *tmp;
>>>> +	struct sta_info *sta;
>>>> +	int ac;
>>>> +
>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>> +		__ieee80211_check_txqs(local, ac);
>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>> +	}
>>>> +
>>>> +	mod_timer(&local->remove_timer,
>>>> +		  jiffies +
>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>> +}
>>> 
>>> I'll ask the same as I did last time (where you told me to hold off
>>> until this round):
>>> 
>>> Why do you need the timer and the periodic check? If TXQs are added 
>>> to
>>> the remove list during the scheduling run, and 
>>> __ieee80211_check_txqs()
>>> is run from schedule_end(), isn't that sufficient to clear the list?
>> Is it possible that a txq is not added to the remove list but then
>> packets in it are dropped by fq_codel algo? Like the station 
>> disconnects
>> without any notification.
> 
> Well as long as all the other cleanup paths call directly into
> __unschedule_txq(), that should remove stations from the scheduler when
> they disconnect etc.
Yes, the disconnect scenario is a bad example. My concern is, say, we 
have 10 stations and only one of them is assigned a very small weight 
compared with that of others. Suppose, after its chance of Tx, it is 
most likely to be placed in the rightmost(still has some packets in the 
txq) and no more incoming data for it. The remaining packets in txq will 
be dropped due to timeout algo in codel(correct me if I am wrong) but 
this empty txq will stay on the rbtree until other txqs get drained or 
global vt catch up with its vt. The staying time could be long if weight 
is extremely small. Then do we need timer to check or any other better 
solution?

> 
> We only need to defer removal inside a single "scheduling round" (i.e.,
> between a pair of ieee80211_txq_schedule_start/end. So if we just walk
> the remove list in schedule_end() we should be enough, no?
> 
> Hmm, or maybe a simpler way to fix the original issue is just to have
> unschedule_txq() update the schedule_pos() pointer?
> 
> I.e., unschedule_txq checks if the txq being removed is currently being
> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos 
> to
> be the rb_next of the current value?
Actually, if schedule_pos is updated to rb_next of the current value, 
then in the next_txq() where we are going to use rb_next again and 
finally pick the next node of the node we really want. Is it fine to 
update schedule_pos to NULL?
> 
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-19  9:56           ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-19  9:56 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>> removal right before the end of this schedule round.
>>>> 
>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>> 
>>> I didn't write this patch, so please don't use my sign-off. I'll add
>>> ack or review tags as appropriate in reply; but a few comments first:
>>> 
>>>> ---
>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>  net/mac80211/main.c        |  6 +++++
>>>>  net/mac80211/tx.c          | 63
>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>> 
>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>> index ac2ed8e..ba5a345 100644
>>>> --- a/include/net/mac80211.h
>>>> +++ b/include/net/mac80211.h
>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>> 
>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>> 
>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>> +
>>>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate
>>>> *rate,
>>>>  					  u8 mcs, u8 nss)
>>>>  {
>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>> ieee80211_hw *hw,
>>>>   * @ac: AC number to return packets from.
>>>>   *
>>>>   * Should only be called between calls to
>>>> ieee80211_txq_schedule_start()
>>>> - * and ieee80211_txq_schedule_end().
>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will 
>>>> be
>>>> added
>>>> + * to a remove list and get removed later.
>>>>   * Returns the next txq if successful, %NULL if no queue is 
>>>> eligible.
>>>> If a txq
>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>> after the
>>>>   * driver has finished scheduling it.
>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>> ieee80211_hw *hw, u8 ac)
>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>   * @ac: AC number to acquire locks for
>>>>   *
>>>> - * Release locks previously acquired by 
>>>> ieee80211_txq_schedule_end().
>>>> + * Release locks previously acquired by 
>>>> ieee80211_txq_schedule_end().
>>>> Check
>>>> + * and remove the empty txq from rb-tree.
>>>>   */
>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>  	__releases(txq_lock);
>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct 
>>>> ieee80211_hw
>>>> *hw, struct ieee80211_txq *txq)
>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>> 
>>>>  /**
>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>> + *
>>>> + * @tmr: pointer as obtained from local
>>>> + *
>>>> + */
>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>> +
>>>> +/**
>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>> transmit
>>>>   *
>>>>   * This function is used to check whether given txq is allowed to
>>>> transmit by
>>>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>>>> index a4556f9..49aa143e 100644
>>>> --- a/net/mac80211/ieee80211_i.h
>>>> +++ b/net/mac80211/ieee80211_i.h
>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>  	struct codel_stats cstats;
>>>>  	struct sk_buff_head frags;
>>>>  	struct rb_node schedule_order;
>>>> +	struct list_head candidate;
>>>>  	unsigned long flags;
>>>> 
>>>>  	/* keep last! */
>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>> 
>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>> +	struct timer_list remove_timer;
>>>>  	u16 airtime_flags;
>>>> 
>>>>  	const struct ieee80211_ops *ops;
>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>> index e9ffa8e..78fe24a 100644
>>>> --- a/net/mac80211/main.c
>>>> +++ b/net/mac80211/main.c
>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>> 
>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>  	}
>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>> 
>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>> +	mod_timer(&local->remove_timer,
>>>> +		  jiffies +
>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>> +
>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>  	mutex_init(&local->chanctx_mtx);
>>>> 
>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct 
>>>> ieee80211_hw
>>>> *hw)
>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>  	tasklet_kill(&local->tasklet);
>>>> 
>>>> +	del_timer_sync(&local->remove_timer);
>>>>  #ifdef CONFIG_INET
>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>  #endif
>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>> index d00baaa..42ca010 100644
>>>> --- a/net/mac80211/tx.c
>>>> +++ b/net/mac80211/tx.c
>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>> ieee80211_sub_if_data *sdata,
>>>>  	codel_stats_init(&txqi->cstats);
>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>> 
>>>>  	txqi->txq.vif = &sdata->vif;
>>>> 
>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct 
>>>> ieee80211_hw
>>>> *hw,
>>>> 
>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>> 
>>>> +	if (!list_empty(&txqi->candidate))
>>>> +		list_del_init(&txqi->candidate);
>>>> +
>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>  		goto out;
>>>> 
>>>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct
>>>> ieee80211_hw *hw,
>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>  }
>>>> 
>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>> +			  struct ieee80211_txq *txq)
>>>> +{
>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>> +
>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>> +
>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>> +		list_del_init(&txqi->candidate);
>>>> +	}
>>>> +}
>>>> +
>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>  			      struct ieee80211_txq *txq)
>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>> ieee80211_hw *hw,
>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>> 
>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>> +	ieee80211_remove_txq(hw, txq);
>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>  }
>>>> 
>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct 
>>>> ieee80211_hw
>>>> *hw,
>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>> 
>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>> +		!txq_has_queue(&txqi->txq) &&
>>>> +		list_empty(&txqi->candidate))
>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>> +
>>>>  }
>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>> 
>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>>>> +{
>>>> +	struct txq_info *iter, *tmp;
>>>> +	struct sta_info *sta;
>>>> +
>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>> +
>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>> +				 candidate) {
>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>> +
>>>> +		if (txq_has_queue(&iter->txq))
>>>> +			list_del_init(&iter->candidate);
>>>> +		else
>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>> +	}
>>>> +}
>>>> +
>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>> +{
>>>> +	struct ieee80211_local *local = from_timer(local, t, 
>>>> remove_timer);
>>>> +	struct txq_info *iter, *tmp;
>>>> +	struct sta_info *sta;
>>>> +	int ac;
>>>> +
>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>> +		__ieee80211_check_txqs(local, ac);
>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>> +	}
>>>> +
>>>> +	mod_timer(&local->remove_timer,
>>>> +		  jiffies +
>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>> +}
>>> 
>>> I'll ask the same as I did last time (where you told me to hold off
>>> until this round):
>>> 
>>> Why do you need the timer and the periodic check? If TXQs are added 
>>> to
>>> the remove list during the scheduling run, and 
>>> __ieee80211_check_txqs()
>>> is run from schedule_end(), isn't that sufficient to clear the list?
>> Is it possible that a txq is not added to the remove list but then
>> packets in it are dropped by fq_codel algo? Like the station 
>> disconnects
>> without any notification.
> 
> Well as long as all the other cleanup paths call directly into
> __unschedule_txq(), that should remove stations from the scheduler when
> they disconnect etc.
Yes, the disconnect scenario is a bad example. My concern is, say, we 
have 10 stations and only one of them is assigned a very small weight 
compared with that of others. Suppose, after its chance of Tx, it is 
most likely to be placed in the rightmost(still has some packets in the 
txq) and no more incoming data for it. The remaining packets in txq will 
be dropped due to timeout algo in codel(correct me if I am wrong) but 
this empty txq will stay on the rbtree until other txqs get drained or 
global vt catch up with its vt. The staying time could be long if weight 
is extremely small. Then do we need timer to check or any other better 
solution?

> 
> We only need to defer removal inside a single "scheduling round" (i.e.,
> between a pair of ieee80211_txq_schedule_start/end. So if we just walk
> the remove list in schedule_end() we should be enough, no?
> 
> Hmm, or maybe a simpler way to fix the original issue is just to have
> unschedule_txq() update the schedule_pos() pointer?
> 
> I.e., unschedule_txq checks if the txq being removed is currently being
> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos 
> to
> be the rb_next of the current value?
Actually, if schedule_pos is updated to rb_next of the current value, 
then in the next_txq() where we are going to use rb_next again and 
finally pick the next node of the node we really want. Is it fine to 
update schedule_pos to NULL?
> 
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-19  9:56           ` Yibo Zhao
@ 2019-09-19 10:37             ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-19 10:37 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>> removal right before the end of this schedule round.
>>>>> 
>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>> 
>>>> I didn't write this patch, so please don't use my sign-off. I'll add
>>>> ack or review tags as appropriate in reply; but a few comments first:
>>>> 
>>>>> ---
>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>  net/mac80211/tx.c          | 63
>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>> 
>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>> index ac2ed8e..ba5a345 100644
>>>>> --- a/include/net/mac80211.h
>>>>> +++ b/include/net/mac80211.h
>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>> 
>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>> 
>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>> +
>>>>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate
>>>>> *rate,
>>>>>  					  u8 mcs, u8 nss)
>>>>>  {
>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>>> ieee80211_hw *hw,
>>>>>   * @ac: AC number to return packets from.
>>>>>   *
>>>>>   * Should only be called between calls to
>>>>> ieee80211_txq_schedule_start()
>>>>> - * and ieee80211_txq_schedule_end().
>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will 
>>>>> be
>>>>> added
>>>>> + * to a remove list and get removed later.
>>>>>   * Returns the next txq if successful, %NULL if no queue is 
>>>>> eligible.
>>>>> If a txq
>>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>>> after the
>>>>>   * driver has finished scheduling it.
>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>> ieee80211_hw *hw, u8 ac)
>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>   * @ac: AC number to acquire locks for
>>>>>   *
>>>>> - * Release locks previously acquired by 
>>>>> ieee80211_txq_schedule_end().
>>>>> + * Release locks previously acquired by 
>>>>> ieee80211_txq_schedule_end().
>>>>> Check
>>>>> + * and remove the empty txq from rb-tree.
>>>>>   */
>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>  	__releases(txq_lock);
>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct 
>>>>> ieee80211_hw
>>>>> *hw, struct ieee80211_txq *txq)
>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>> 
>>>>>  /**
>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>> + *
>>>>> + * @tmr: pointer as obtained from local
>>>>> + *
>>>>> + */
>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>> +
>>>>> +/**
>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>> transmit
>>>>>   *
>>>>>   * This function is used to check whether given txq is allowed to
>>>>> transmit by
>>>>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>>>>> index a4556f9..49aa143e 100644
>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>  	struct codel_stats cstats;
>>>>>  	struct sk_buff_head frags;
>>>>>  	struct rb_node schedule_order;
>>>>> +	struct list_head candidate;
>>>>>  	unsigned long flags;
>>>>> 
>>>>>  	/* keep last! */
>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>> 
>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>> +	struct timer_list remove_timer;
>>>>>  	u16 airtime_flags;
>>>>> 
>>>>>  	const struct ieee80211_ops *ops;
>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>> index e9ffa8e..78fe24a 100644
>>>>> --- a/net/mac80211/main.c
>>>>> +++ b/net/mac80211/main.c
>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>> 
>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>  	}
>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>> 
>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>> +	mod_timer(&local->remove_timer,
>>>>> +		  jiffies +
>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>> +
>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>> 
>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct 
>>>>> ieee80211_hw
>>>>> *hw)
>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>  	tasklet_kill(&local->tasklet);
>>>>> 
>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>  #ifdef CONFIG_INET
>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>  #endif
>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>> index d00baaa..42ca010 100644
>>>>> --- a/net/mac80211/tx.c
>>>>> +++ b/net/mac80211/tx.c
>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>> ieee80211_sub_if_data *sdata,
>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>> 
>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>> 
>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct 
>>>>> ieee80211_hw
>>>>> *hw,
>>>>> 
>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>> 
>>>>> +	if (!list_empty(&txqi->candidate))
>>>>> +		list_del_init(&txqi->candidate);
>>>>> +
>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>  		goto out;
>>>>> 
>>>>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct
>>>>> ieee80211_hw *hw,
>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>  }
>>>>> 
>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>> +			  struct ieee80211_txq *txq)
>>>>> +{
>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>> +
>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>> +
>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>> +		list_del_init(&txqi->candidate);
>>>>> +	}
>>>>> +}
>>>>> +
>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>  			      struct ieee80211_txq *txq)
>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>> ieee80211_hw *hw,
>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>> 
>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>  }
>>>>> 
>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct 
>>>>> ieee80211_hw
>>>>> *hw,
>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>> 
>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>> +		list_empty(&txqi->candidate))
>>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>>> +
>>>>>  }
>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>> 
>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>>>>> +{
>>>>> +	struct txq_info *iter, *tmp;
>>>>> +	struct sta_info *sta;
>>>>> +
>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>> +
>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>> +				 candidate) {
>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>> +
>>>>> +		if (txq_has_queue(&iter->txq))
>>>>> +			list_del_init(&iter->candidate);
>>>>> +		else
>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>> +{
>>>>> +	struct ieee80211_local *local = from_timer(local, t, 
>>>>> remove_timer);
>>>>> +	struct txq_info *iter, *tmp;
>>>>> +	struct sta_info *sta;
>>>>> +	int ac;
>>>>> +
>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>> +	}
>>>>> +
>>>>> +	mod_timer(&local->remove_timer,
>>>>> +		  jiffies +
>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>> +}
>>>> 
>>>> I'll ask the same as I did last time (where you told me to hold off
>>>> until this round):
>>>> 
>>>> Why do you need the timer and the periodic check? If TXQs are added 
>>>> to
>>>> the remove list during the scheduling run, and 
>>>> __ieee80211_check_txqs()
>>>> is run from schedule_end(), isn't that sufficient to clear the list?
>>> Is it possible that a txq is not added to the remove list but then
>>> packets in it are dropped by fq_codel algo? Like the station 
>>> disconnects
>>> without any notification.
>> 
>> Well as long as all the other cleanup paths call directly into
>> __unschedule_txq(), that should remove stations from the scheduler when
>> they disconnect etc.
> Yes, the disconnect scenario is a bad example. My concern is, say, we 
> have 10 stations and only one of them is assigned a very small weight 
> compared with that of others. Suppose, after its chance of Tx, it is 
> most likely to be placed in the rightmost(still has some packets in the 
> txq) and no more incoming data for it. The remaining packets in txq will 
> be dropped due to timeout algo in codel(correct me if I am wrong) but 
> this empty txq will stay on the rbtree until other txqs get drained or 
> global vt catch up with its vt. The staying time could be long if weight 
> is extremely small. Then do we need timer to check or any other better 
> solution?

Ah, I see what you mean. No, I don't think this will be a problem; the
scenario you're describing would play out like this:

1. Station ends transmitting, still has a single packet queued, gets
   moved to the end of the rbtree (and stays there for a while).

2. When we finally get to the point where this station gets another
   chance to transmit, the CoDel drop timer triggers and the last packet
   is dropped[0]. This means that the queue will just be empty
   (and ieee80211_tx_dequeue() will return NULL).

3. Because the queue is empty, ieee80211_return_txq() will not put it
   back on the rbtree.

Crucially, in 2. the CoDel algorithm doesn't kick in until the point of
packet dequeue. But even if an empty queue stays on the rbtree for a
while, there is no harm in that: eventually it will get its turn, it
will turn out to be empty, and just be skipped over.

The issue we need to be concerned about is the opposite: If we have a
queue that *does* have packets queued, but which is *not* scheduled for
transmission, that will stall TX.

[0] CoDel in most cases only drops a single packet at a time, so it will
not clear out an entire queue with multiple packets in one go. But you
are right that it could conceivably drop the last packet in a queue.

>> We only need to defer removal inside a single "scheduling round" (i.e.,
>> between a pair of ieee80211_txq_schedule_start/end. So if we just walk
>> the remove list in schedule_end() we should be enough, no?
>> 
>> Hmm, or maybe a simpler way to fix the original issue is just to have
>> unschedule_txq() update the schedule_pos() pointer?
>> 
>> I.e., unschedule_txq checks if the txq being removed is currently being
>> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos 
>> to
>> be the rb_next of the current value?
> Actually, if schedule_pos is updated to rb_next of the current value, 
> then in the next_txq() where we are going to use rb_next again and 
> finally pick the next node of the node we really want. Is it fine to 
> update schedule_pos to NULL?

Hmm, yeah, good point.

If we do end up setting schedule_pos to NULL in the middle of a
scheduling round, that will make next_txq() "start over", and do another
loop through the whole thing. I guess we may be able hit a case where
things can oscillate back and forth between addition and removal
resulting in an infinite loop? Not sure, but at least I can't seem to
convince myself that this can't happen.

But in that case, we could fix it by just conditionally assigning either
rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
something like:

local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-19 10:37             ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-19 10:37 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree gets
>>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>>> loop will break soon in the ieee80211_next_txq() due to schedule_pos
>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>> removal right before the end of this schedule round.
>>>>> 
>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>> 
>>>> I didn't write this patch, so please don't use my sign-off. I'll add
>>>> ack or review tags as appropriate in reply; but a few comments first:
>>>> 
>>>>> ---
>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>  net/mac80211/tx.c          | 63
>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>> 
>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>> index ac2ed8e..ba5a345 100644
>>>>> --- a/include/net/mac80211.h
>>>>> +++ b/include/net/mac80211.h
>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>> 
>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>> 
>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>> +
>>>>>  static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate
>>>>> *rate,
>>>>>  					  u8 mcs, u8 nss)
>>>>>  {
>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>>> ieee80211_hw *hw,
>>>>>   * @ac: AC number to return packets from.
>>>>>   *
>>>>>   * Should only be called between calls to
>>>>> ieee80211_txq_schedule_start()
>>>>> - * and ieee80211_txq_schedule_end().
>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will 
>>>>> be
>>>>> added
>>>>> + * to a remove list and get removed later.
>>>>>   * Returns the next txq if successful, %NULL if no queue is 
>>>>> eligible.
>>>>> If a txq
>>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>>> after the
>>>>>   * driver has finished scheduling it.
>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>> ieee80211_hw *hw, u8 ac)
>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>   * @ac: AC number to acquire locks for
>>>>>   *
>>>>> - * Release locks previously acquired by 
>>>>> ieee80211_txq_schedule_end().
>>>>> + * Release locks previously acquired by 
>>>>> ieee80211_txq_schedule_end().
>>>>> Check
>>>>> + * and remove the empty txq from rb-tree.
>>>>>   */
>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>  	__releases(txq_lock);
>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct 
>>>>> ieee80211_hw
>>>>> *hw, struct ieee80211_txq *txq)
>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>> 
>>>>>  /**
>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>> + *
>>>>> + * @tmr: pointer as obtained from local
>>>>> + *
>>>>> + */
>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>> +
>>>>> +/**
>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>> transmit
>>>>>   *
>>>>>   * This function is used to check whether given txq is allowed to
>>>>> transmit by
>>>>> diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
>>>>> index a4556f9..49aa143e 100644
>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>  	struct codel_stats cstats;
>>>>>  	struct sk_buff_head frags;
>>>>>  	struct rb_node schedule_order;
>>>>> +	struct list_head candidate;
>>>>>  	unsigned long flags;
>>>>> 
>>>>>  	/* keep last! */
>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>> 
>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>> +	struct timer_list remove_timer;
>>>>>  	u16 airtime_flags;
>>>>> 
>>>>>  	const struct ieee80211_ops *ops;
>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>> index e9ffa8e..78fe24a 100644
>>>>> --- a/net/mac80211/main.c
>>>>> +++ b/net/mac80211/main.c
>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>> 
>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>  	}
>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>> 
>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>> +	mod_timer(&local->remove_timer,
>>>>> +		  jiffies +
>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>> +
>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>> 
>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct 
>>>>> ieee80211_hw
>>>>> *hw)
>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>  	tasklet_kill(&local->tasklet);
>>>>> 
>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>  #ifdef CONFIG_INET
>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>  #endif
>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>> index d00baaa..42ca010 100644
>>>>> --- a/net/mac80211/tx.c
>>>>> +++ b/net/mac80211/tx.c
>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>> ieee80211_sub_if_data *sdata,
>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>> 
>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>> 
>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct 
>>>>> ieee80211_hw
>>>>> *hw,
>>>>> 
>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>> 
>>>>> +	if (!list_empty(&txqi->candidate))
>>>>> +		list_del_init(&txqi->candidate);
>>>>> +
>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>  		goto out;
>>>>> 
>>>>> @@ -3783,6 +3787,20 @@ static void __ieee80211_unschedule_txq(struct
>>>>> ieee80211_hw *hw,
>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>  }
>>>>> 
>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>> +			  struct ieee80211_txq *txq)
>>>>> +{
>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>> +
>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>> +
>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>> +		list_del_init(&txqi->candidate);
>>>>> +	}
>>>>> +}
>>>>> +
>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>  			      struct ieee80211_txq *txq)
>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>> ieee80211_hw *hw,
>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>> 
>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>  }
>>>>> 
>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct 
>>>>> ieee80211_hw
>>>>> *hw,
>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>> 
>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>> -	    (skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets))
>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>> +		list_empty(&txqi->candidate))
>>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>>> +
>>>>>  }
>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>> 
>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int ac)
>>>>> +{
>>>>> +	struct txq_info *iter, *tmp;
>>>>> +	struct sta_info *sta;
>>>>> +
>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>> +
>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>> +				 candidate) {
>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>> +
>>>>> +		if (txq_has_queue(&iter->txq))
>>>>> +			list_del_init(&iter->candidate);
>>>>> +		else
>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>> +{
>>>>> +	struct ieee80211_local *local = from_timer(local, t, 
>>>>> remove_timer);
>>>>> +	struct txq_info *iter, *tmp;
>>>>> +	struct sta_info *sta;
>>>>> +	int ac;
>>>>> +
>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>> +	}
>>>>> +
>>>>> +	mod_timer(&local->remove_timer,
>>>>> +		  jiffies +
>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>> +}
>>>> 
>>>> I'll ask the same as I did last time (where you told me to hold off
>>>> until this round):
>>>> 
>>>> Why do you need the timer and the periodic check? If TXQs are added 
>>>> to
>>>> the remove list during the scheduling run, and 
>>>> __ieee80211_check_txqs()
>>>> is run from schedule_end(), isn't that sufficient to clear the list?
>>> Is it possible that a txq is not added to the remove list but then
>>> packets in it are dropped by fq_codel algo? Like the station 
>>> disconnects
>>> without any notification.
>> 
>> Well as long as all the other cleanup paths call directly into
>> __unschedule_txq(), that should remove stations from the scheduler when
>> they disconnect etc.
> Yes, the disconnect scenario is a bad example. My concern is, say, we 
> have 10 stations and only one of them is assigned a very small weight 
> compared with that of others. Suppose, after its chance of Tx, it is 
> most likely to be placed in the rightmost(still has some packets in the 
> txq) and no more incoming data for it. The remaining packets in txq will 
> be dropped due to timeout algo in codel(correct me if I am wrong) but 
> this empty txq will stay on the rbtree until other txqs get drained or 
> global vt catch up with its vt. The staying time could be long if weight 
> is extremely small. Then do we need timer to check or any other better 
> solution?

Ah, I see what you mean. No, I don't think this will be a problem; the
scenario you're describing would play out like this:

1. Station ends transmitting, still has a single packet queued, gets
   moved to the end of the rbtree (and stays there for a while).

2. When we finally get to the point where this station gets another
   chance to transmit, the CoDel drop timer triggers and the last packet
   is dropped[0]. This means that the queue will just be empty
   (and ieee80211_tx_dequeue() will return NULL).

3. Because the queue is empty, ieee80211_return_txq() will not put it
   back on the rbtree.

Crucially, in 2. the CoDel algorithm doesn't kick in until the point of
packet dequeue. But even if an empty queue stays on the rbtree for a
while, there is no harm in that: eventually it will get its turn, it
will turn out to be empty, and just be skipped over.

The issue we need to be concerned about is the opposite: If we have a
queue that *does* have packets queued, but which is *not* scheduled for
transmission, that will stall TX.

[0] CoDel in most cases only drops a single packet at a time, so it will
not clear out an entire queue with multiple packets in one go. But you
are right that it could conceivably drop the last packet in a queue.

>> We only need to defer removal inside a single "scheduling round" (i.e.,
>> between a pair of ieee80211_txq_schedule_start/end. So if we just walk
>> the remove list in schedule_end() we should be enough, no?
>> 
>> Hmm, or maybe a simpler way to fix the original issue is just to have
>> unschedule_txq() update the schedule_pos() pointer?
>> 
>> I.e., unschedule_txq checks if the txq being removed is currently being
>> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos 
>> to
>> be the rb_next of the current value?
> Actually, if schedule_pos is updated to rb_next of the current value, 
> then in the next_txq() where we are going to use rb_next again and 
> finally pick the next node of the node we really want. Is it fine to 
> update schedule_pos to NULL?

Hmm, yeah, good point.

If we do end up setting schedule_pos to NULL in the middle of a
scheduling round, that will make next_txq() "start over", and do another
loop through the whole thing. I guess we may be able hit a case where
things can oscillate back and forth between addition and removal
resulting in an infinite loop? Not sure, but at least I can't seem to
convince myself that this can't happen.

But in that case, we could fix it by just conditionally assigning either
rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
something like:

local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-19 10:37             ` Toke Høiland-Jørgensen
@ 2019-09-20  8:29               ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-20  8:29 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree 
>>>>>> gets
>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>>>> loop will break soon in the ieee80211_next_txq() due to 
>>>>>> schedule_pos
>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>> removal right before the end of this schedule round.
>>>>>> 
>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>> 
>>>>> I didn't write this patch, so please don't use my sign-off. I'll 
>>>>> add
>>>>> ack or review tags as appropriate in reply; but a few comments 
>>>>> first:
>>>>> 
>>>>>> ---
>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>  net/mac80211/tx.c          | 63
>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>> 
>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>> index ac2ed8e..ba5a345 100644
>>>>>> --- a/include/net/mac80211.h
>>>>>> +++ b/include/net/mac80211.h
>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>> 
>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>> 
>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>> +
>>>>>>  static inline void ieee80211_rate_set_vht(struct 
>>>>>> ieee80211_tx_rate
>>>>>> *rate,
>>>>>>  					  u8 mcs, u8 nss)
>>>>>>  {
>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>>>> ieee80211_hw *hw,
>>>>>>   * @ac: AC number to return packets from.
>>>>>>   *
>>>>>>   * Should only be called between calls to
>>>>>> ieee80211_txq_schedule_start()
>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will
>>>>>> be
>>>>>> added
>>>>>> + * to a remove list and get removed later.
>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>> eligible.
>>>>>> If a txq
>>>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>>>> after the
>>>>>>   * driver has finished scheduling it.
>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>   * @ac: AC number to acquire locks for
>>>>>>   *
>>>>>> - * Release locks previously acquired by
>>>>>> ieee80211_txq_schedule_end().
>>>>>> + * Release locks previously acquired by
>>>>>> ieee80211_txq_schedule_end().
>>>>>> Check
>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>   */
>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>  	__releases(txq_lock);
>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>> ieee80211_hw
>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>> 
>>>>>>  /**
>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>> + *
>>>>>> + * @tmr: pointer as obtained from local
>>>>>> + *
>>>>>> + */
>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>> +
>>>>>> +/**
>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>> transmit
>>>>>>   *
>>>>>>   * This function is used to check whether given txq is allowed to
>>>>>> transmit by
>>>>>> diff --git a/net/mac80211/ieee80211_i.h 
>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>> index a4556f9..49aa143e 100644
>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>  	struct codel_stats cstats;
>>>>>>  	struct sk_buff_head frags;
>>>>>>  	struct rb_node schedule_order;
>>>>>> +	struct list_head candidate;
>>>>>>  	unsigned long flags;
>>>>>> 
>>>>>>  	/* keep last! */
>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>> 
>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>> +	struct timer_list remove_timer;
>>>>>>  	u16 airtime_flags;
>>>>>> 
>>>>>>  	const struct ieee80211_ops *ops;
>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>> index e9ffa8e..78fe24a 100644
>>>>>> --- a/net/mac80211/main.c
>>>>>> +++ b/net/mac80211/main.c
>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>> 
>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>  	}
>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>> 
>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>> +	mod_timer(&local->remove_timer,
>>>>>> +		  jiffies +
>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>> +
>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>> 
>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>> ieee80211_hw
>>>>>> *hw)
>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>> 
>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>  #ifdef CONFIG_INET
>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>  #endif
>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>> index d00baaa..42ca010 100644
>>>>>> --- a/net/mac80211/tx.c
>>>>>> +++ b/net/mac80211/tx.c
>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>> 
>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>> 
>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>> ieee80211_hw
>>>>>> *hw,
>>>>>> 
>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>> 
>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>> +		list_del_init(&txqi->candidate);
>>>>>> +
>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>  		goto out;
>>>>>> 
>>>>>> @@ -3783,6 +3787,20 @@ static void 
>>>>>> __ieee80211_unschedule_txq(struct
>>>>>> ieee80211_hw *hw,
>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>  }
>>>>>> 
>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>> +			  struct ieee80211_txq *txq)
>>>>>> +{
>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>> +
>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>> +
>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>> +		list_del_init(&txqi->candidate);
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>> ieee80211_hw *hw,
>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>> 
>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>  }
>>>>>> 
>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>> ieee80211_hw
>>>>>> *hw,
>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>> 
>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>> -	    (skb_queue_empty(&txqi->frags) && 
>>>>>> !txqi->tin.backlog_packets))
>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>> +		list_empty(&txqi->candidate))
>>>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>>>> +
>>>>>>  }
>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>> 
>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int 
>>>>>> ac)
>>>>>> +{
>>>>>> +	struct txq_info *iter, *tmp;
>>>>>> +	struct sta_info *sta;
>>>>>> +
>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>> +
>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>> +				 candidate) {
>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>> +
>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>> +			list_del_init(&iter->candidate);
>>>>>> +		else
>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>> +{
>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>> remove_timer);
>>>>>> +	struct txq_info *iter, *tmp;
>>>>>> +	struct sta_info *sta;
>>>>>> +	int ac;
>>>>>> +
>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>> +	}
>>>>>> +
>>>>>> +	mod_timer(&local->remove_timer,
>>>>>> +		  jiffies +
>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>> +}
>>>>> 
>>>>> I'll ask the same as I did last time (where you told me to hold off
>>>>> until this round):
>>>>> 
>>>>> Why do you need the timer and the periodic check? If TXQs are added
>>>>> to
>>>>> the remove list during the scheduling run, and
>>>>> __ieee80211_check_txqs()
>>>>> is run from schedule_end(), isn't that sufficient to clear the 
>>>>> list?
>>>> Is it possible that a txq is not added to the remove list but then
>>>> packets in it are dropped by fq_codel algo? Like the station
>>>> disconnects
>>>> without any notification.
>>> 
>>> Well as long as all the other cleanup paths call directly into
>>> __unschedule_txq(), that should remove stations from the scheduler 
>>> when
>>> they disconnect etc.
>> Yes, the disconnect scenario is a bad example. My concern is, say, we
>> have 10 stations and only one of them is assigned a very small weight
>> compared with that of others. Suppose, after its chance of Tx, it is
>> most likely to be placed in the rightmost(still has some packets in 
>> the
>> txq) and no more incoming data for it. The remaining packets in txq 
>> will
>> be dropped due to timeout algo in codel(correct me if I am wrong) but
>> this empty txq will stay on the rbtree until other txqs get drained or
>> global vt catch up with its vt. The staying time could be long if 
>> weight
>> is extremely small. Then do we need timer to check or any other better
>> solution?
> 
> Ah, I see what you mean. No, I don't think this will be a problem; the
> scenario you're describing would play out like this:
> 
> 1. Station ends transmitting, still has a single packet queued, gets
>    moved to the end of the rbtree (and stays there for a while).
> 
> 2. When we finally get to the point where this station gets another
>    chance to transmit, the CoDel drop timer triggers and the last 
> packet
>    is dropped[0]. This means that the queue will just be empty
>    (and ieee80211_tx_dequeue() will return NULL).
> 
> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>    back on the rbtree.
> 
> Crucially, in 2. the CoDel algorithm doesn't kick in until the point of
> packet dequeue. But even if an empty queue stays on the rbtree for a
> while, there is no harm in that: eventually it will get its turn, it
> will turn out to be empty, and just be skipped over.
Then that will be fine. Thanks for the explanation of the dropping part 
in CoDel algorithm.
> 
> The issue we need to be concerned about is the opposite: If we have a
> queue that *does* have packets queued, but which is *not* scheduled for
> transmission, that will stall TX.
Is it by design since its vt is more than global vt, right? The lattency 
may somehow get impacted though.
> 
> [0] CoDel in most cases only drops a single packet at a time, so it 
> will
> not clear out an entire queue with multiple packets in one go. But you
> are right that it could conceivably drop the last packet in a queue.
> 
>>> We only need to defer removal inside a single "scheduling round" 
>>> (i.e.,
>>> between a pair of ieee80211_txq_schedule_start/end. So if we just 
>>> walk
>>> the remove list in schedule_end() we should be enough, no?
>>> 
>>> Hmm, or maybe a simpler way to fix the original issue is just to have
>>> unschedule_txq() update the schedule_pos() pointer?
>>> 
>>> I.e., unschedule_txq checks if the txq being removed is currently 
>>> being
>>> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos
>>> to
>>> be the rb_next of the current value?
>> Actually, if schedule_pos is updated to rb_next of the current value,
>> then in the next_txq() where we are going to use rb_next again and
>> finally pick the next node of the node we really want. Is it fine to
>> update schedule_pos to NULL?
> 
> Hmm, yeah, good point.
> 
> If we do end up setting schedule_pos to NULL in the middle of a
> scheduling round, that will make next_txq() "start over", and do 
> another
> loop through the whole thing. I guess we may be able hit a case where
> things can oscillate back and forth between addition and removal
> resulting in an infinite loop? Not sure, but at least I can't seem to
> convince myself that this can't happen.

As the loop of next_txq under lock protection as below,

txq_schedule_start();
while(txq=next_txq()){
...
return_txq(txq);
}
txq_schedule_end();

I do not see any chance of addition, no?
In ath10k, we will usually push packets of first txq as many as we can 
until it is drained and then move to the next one. So if a txq gets 
removed in the return_txq, it should always be the leftmost. And during 
this period, neither vt of any station or global vt can be updated due 
to lock protection.

> 
> But in that case, we could fix it by just conditionally assigning 
> either
> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
> something like:
> 
> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
I am not sure I am getting your point. Still in next_txq, 
schedule_pos[ac] will lead us to the next node of the one we want.
> 
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-20  8:29               ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-20  8:29 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree 
>>>>>> gets
>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>>>> loop will break soon in the ieee80211_next_txq() due to 
>>>>>> schedule_pos
>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>> removal right before the end of this schedule round.
>>>>>> 
>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>> 
>>>>> I didn't write this patch, so please don't use my sign-off. I'll 
>>>>> add
>>>>> ack or review tags as appropriate in reply; but a few comments 
>>>>> first:
>>>>> 
>>>>>> ---
>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>  net/mac80211/tx.c          | 63
>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>> 
>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>> index ac2ed8e..ba5a345 100644
>>>>>> --- a/include/net/mac80211.h
>>>>>> +++ b/include/net/mac80211.h
>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>> 
>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>> 
>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>> +
>>>>>>  static inline void ieee80211_rate_set_vht(struct 
>>>>>> ieee80211_tx_rate
>>>>>> *rate,
>>>>>>  					  u8 mcs, u8 nss)
>>>>>>  {
>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>>>> ieee80211_hw *hw,
>>>>>>   * @ac: AC number to return packets from.
>>>>>>   *
>>>>>>   * Should only be called between calls to
>>>>>> ieee80211_txq_schedule_start()
>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will
>>>>>> be
>>>>>> added
>>>>>> + * to a remove list and get removed later.
>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>> eligible.
>>>>>> If a txq
>>>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>>>> after the
>>>>>>   * driver has finished scheduling it.
>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>   * @ac: AC number to acquire locks for
>>>>>>   *
>>>>>> - * Release locks previously acquired by
>>>>>> ieee80211_txq_schedule_end().
>>>>>> + * Release locks previously acquired by
>>>>>> ieee80211_txq_schedule_end().
>>>>>> Check
>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>   */
>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>  	__releases(txq_lock);
>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>> ieee80211_hw
>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>> 
>>>>>>  /**
>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>> + *
>>>>>> + * @tmr: pointer as obtained from local
>>>>>> + *
>>>>>> + */
>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>> +
>>>>>> +/**
>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>> transmit
>>>>>>   *
>>>>>>   * This function is used to check whether given txq is allowed to
>>>>>> transmit by
>>>>>> diff --git a/net/mac80211/ieee80211_i.h 
>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>> index a4556f9..49aa143e 100644
>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>  	struct codel_stats cstats;
>>>>>>  	struct sk_buff_head frags;
>>>>>>  	struct rb_node schedule_order;
>>>>>> +	struct list_head candidate;
>>>>>>  	unsigned long flags;
>>>>>> 
>>>>>>  	/* keep last! */
>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>> 
>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>> +	struct timer_list remove_timer;
>>>>>>  	u16 airtime_flags;
>>>>>> 
>>>>>>  	const struct ieee80211_ops *ops;
>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>> index e9ffa8e..78fe24a 100644
>>>>>> --- a/net/mac80211/main.c
>>>>>> +++ b/net/mac80211/main.c
>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>> 
>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>  	}
>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>> 
>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>> +	mod_timer(&local->remove_timer,
>>>>>> +		  jiffies +
>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>> +
>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>> 
>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>> ieee80211_hw
>>>>>> *hw)
>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>> 
>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>  #ifdef CONFIG_INET
>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>  #endif
>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>> index d00baaa..42ca010 100644
>>>>>> --- a/net/mac80211/tx.c
>>>>>> +++ b/net/mac80211/tx.c
>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>> 
>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>> 
>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>> ieee80211_hw
>>>>>> *hw,
>>>>>> 
>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>> 
>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>> +		list_del_init(&txqi->candidate);
>>>>>> +
>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>  		goto out;
>>>>>> 
>>>>>> @@ -3783,6 +3787,20 @@ static void 
>>>>>> __ieee80211_unschedule_txq(struct
>>>>>> ieee80211_hw *hw,
>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>  }
>>>>>> 
>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>> +			  struct ieee80211_txq *txq)
>>>>>> +{
>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>> +
>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>> +
>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>> +		list_del_init(&txqi->candidate);
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>> ieee80211_hw *hw,
>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>> 
>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>  }
>>>>>> 
>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>> ieee80211_hw
>>>>>> *hw,
>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>> 
>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>> -	    (skb_queue_empty(&txqi->frags) && 
>>>>>> !txqi->tin.backlog_packets))
>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>> +		list_empty(&txqi->candidate))
>>>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>>>> +
>>>>>>  }
>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>> 
>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int 
>>>>>> ac)
>>>>>> +{
>>>>>> +	struct txq_info *iter, *tmp;
>>>>>> +	struct sta_info *sta;
>>>>>> +
>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>> +
>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>> +				 candidate) {
>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>> +
>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>> +			list_del_init(&iter->candidate);
>>>>>> +		else
>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>> +{
>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>> remove_timer);
>>>>>> +	struct txq_info *iter, *tmp;
>>>>>> +	struct sta_info *sta;
>>>>>> +	int ac;
>>>>>> +
>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>> +	}
>>>>>> +
>>>>>> +	mod_timer(&local->remove_timer,
>>>>>> +		  jiffies +
>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>> +}
>>>>> 
>>>>> I'll ask the same as I did last time (where you told me to hold off
>>>>> until this round):
>>>>> 
>>>>> Why do you need the timer and the periodic check? If TXQs are added
>>>>> to
>>>>> the remove list during the scheduling run, and
>>>>> __ieee80211_check_txqs()
>>>>> is run from schedule_end(), isn't that sufficient to clear the 
>>>>> list?
>>>> Is it possible that a txq is not added to the remove list but then
>>>> packets in it are dropped by fq_codel algo? Like the station
>>>> disconnects
>>>> without any notification.
>>> 
>>> Well as long as all the other cleanup paths call directly into
>>> __unschedule_txq(), that should remove stations from the scheduler 
>>> when
>>> they disconnect etc.
>> Yes, the disconnect scenario is a bad example. My concern is, say, we
>> have 10 stations and only one of them is assigned a very small weight
>> compared with that of others. Suppose, after its chance of Tx, it is
>> most likely to be placed in the rightmost(still has some packets in 
>> the
>> txq) and no more incoming data for it. The remaining packets in txq 
>> will
>> be dropped due to timeout algo in codel(correct me if I am wrong) but
>> this empty txq will stay on the rbtree until other txqs get drained or
>> global vt catch up with its vt. The staying time could be long if 
>> weight
>> is extremely small. Then do we need timer to check or any other better
>> solution?
> 
> Ah, I see what you mean. No, I don't think this will be a problem; the
> scenario you're describing would play out like this:
> 
> 1. Station ends transmitting, still has a single packet queued, gets
>    moved to the end of the rbtree (and stays there for a while).
> 
> 2. When we finally get to the point where this station gets another
>    chance to transmit, the CoDel drop timer triggers and the last 
> packet
>    is dropped[0]. This means that the queue will just be empty
>    (and ieee80211_tx_dequeue() will return NULL).
> 
> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>    back on the rbtree.
> 
> Crucially, in 2. the CoDel algorithm doesn't kick in until the point of
> packet dequeue. But even if an empty queue stays on the rbtree for a
> while, there is no harm in that: eventually it will get its turn, it
> will turn out to be empty, and just be skipped over.
Then that will be fine. Thanks for the explanation of the dropping part 
in CoDel algorithm.
> 
> The issue we need to be concerned about is the opposite: If we have a
> queue that *does* have packets queued, but which is *not* scheduled for
> transmission, that will stall TX.
Is it by design since its vt is more than global vt, right? The lattency 
may somehow get impacted though.
> 
> [0] CoDel in most cases only drops a single packet at a time, so it 
> will
> not clear out an entire queue with multiple packets in one go. But you
> are right that it could conceivably drop the last packet in a queue.
> 
>>> We only need to defer removal inside a single "scheduling round" 
>>> (i.e.,
>>> between a pair of ieee80211_txq_schedule_start/end. So if we just 
>>> walk
>>> the remove list in schedule_end() we should be enough, no?
>>> 
>>> Hmm, or maybe a simpler way to fix the original issue is just to have
>>> unschedule_txq() update the schedule_pos() pointer?
>>> 
>>> I.e., unschedule_txq checks if the txq being removed is currently 
>>> being
>>> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos
>>> to
>>> be the rb_next of the current value?
>> Actually, if schedule_pos is updated to rb_next of the current value,
>> then in the next_txq() where we are going to use rb_next again and
>> finally pick the next node of the node we really want. Is it fine to
>> update schedule_pos to NULL?
> 
> Hmm, yeah, good point.
> 
> If we do end up setting schedule_pos to NULL in the middle of a
> scheduling round, that will make next_txq() "start over", and do 
> another
> loop through the whole thing. I guess we may be able hit a case where
> things can oscillate back and forth between addition and removal
> resulting in an infinite loop? Not sure, but at least I can't seem to
> convince myself that this can't happen.

As the loop of next_txq under lock protection as below,

txq_schedule_start();
while(txq=next_txq()){
...
return_txq(txq);
}
txq_schedule_end();

I do not see any chance of addition, no?
In ath10k, we will usually push packets of first txq as many as we can 
until it is drained and then move to the next one. So if a txq gets 
removed in the return_txq, it should always be the leftmost. And during 
this period, neither vt of any station or global vt can be updated due 
to lock protection.

> 
> But in that case, we could fix it by just conditionally assigning 
> either
> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
> something like:
> 
> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
I am not sure I am getting your point. Still in next_txq, 
schedule_pos[ac] will lead us to the next node of the one we want.
> 
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
  2019-09-17 21:31   ` Toke Høiland-Jørgensen
@ 2019-09-20  8:37     ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-20  8:37 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: ath10k, linux-wireless

On 2019-09-18 05:31, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> From: Toke Høiland-Jørgensen <toke@redhat.com>
>> 
>> This switches the airtime scheduler in mac80211 to use a virtual 
>> time-based
>> scheduler instead of the round-robin scheduler used before. This has a
>> couple of advantages:
>> 
>> - No need to sync up the round-robin scheduler in firmware/hardware 
>> with
>>   the round-robin airtime scheduler.
>> 
>> - If several stations are eligible for transmission we can schedule 
>> both of
>>   them; no need to hard-block the scheduling rotation until the head 
>> of the
>>   queue has used up its quantum.
>> 
>> - The check of whether a station is eligible for transmission becomes
>>   simpler (in ieee80211_txq_may_transmit()).
>> 
>> The drawback is that scheduling becomes slightly more expensive, as we 
>> need
>> to maintain an rbtree of TXQs sorted by virtual time. This means that
>> ieee80211_register_airtime() becomes O(logN) in the number of 
>> currently
>> scheduled TXQs. However, hopefully this number rarely grows too big 
>> (it's
>> only TXQs currently backlogged, not all associated stations), so it
>> shouldn't be too big of an issue.
>> 
>> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> 
> I'll note that this patch still has the two issues that Felix pointed
> out when I posted the RFC version. Namely:
> 
> - The use of divisions in the fast path. I guess I need to go write 
> some
>   reciprocal-calculation code, since that is also an issue with the AQL
>   patches I linked to before.
> 
> - The fact that we don't count the airtime usage of multicast traffic,
>   which with this series means that the vif TXQ will get priority over
>   the others. I think we agreed to fix this by just adding an airtime
>   v_t to the vif as well and use that for scheduling the TXQ. Does
>   ath10k report airtime usage for multicast as well, or only for
>   stations?
> 
> 
I remember we have Felix' patches reducing the time the lock is held in 
mac80211 for DRR, do we need to integrate it into this version?
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler
@ 2019-09-20  8:37     ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-20  8:37 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: linux-wireless, ath10k

On 2019-09-18 05:31, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> From: Toke Høiland-Jørgensen <toke@redhat.com>
>> 
>> This switches the airtime scheduler in mac80211 to use a virtual 
>> time-based
>> scheduler instead of the round-robin scheduler used before. This has a
>> couple of advantages:
>> 
>> - No need to sync up the round-robin scheduler in firmware/hardware 
>> with
>>   the round-robin airtime scheduler.
>> 
>> - If several stations are eligible for transmission we can schedule 
>> both of
>>   them; no need to hard-block the scheduling rotation until the head 
>> of the
>>   queue has used up its quantum.
>> 
>> - The check of whether a station is eligible for transmission becomes
>>   simpler (in ieee80211_txq_may_transmit()).
>> 
>> The drawback is that scheduling becomes slightly more expensive, as we 
>> need
>> to maintain an rbtree of TXQs sorted by virtual time. This means that
>> ieee80211_register_airtime() becomes O(logN) in the number of 
>> currently
>> scheduled TXQs. However, hopefully this number rarely grows too big 
>> (it's
>> only TXQs currently backlogged, not all associated stations), so it
>> shouldn't be too big of an issue.
>> 
>> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> 
> I'll note that this patch still has the two issues that Felix pointed
> out when I posted the RFC version. Namely:
> 
> - The use of divisions in the fast path. I guess I need to go write 
> some
>   reciprocal-calculation code, since that is also an issue with the AQL
>   patches I linked to before.
> 
> - The fact that we don't count the airtime usage of multicast traffic,
>   which with this series means that the vif TXQ will get priority over
>   the others. I think we agreed to fix this by just adding an airtime
>   v_t to the vif as well and use that for scheduling the TXQ. Does
>   ath10k report airtime usage for multicast as well, or only for
>   stations?
> 
> 
I remember we have Felix' patches reducing the time the lock is held in 
mac80211 for DRR, do we need to integrate it into this version?
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-20  8:29               ` Yibo Zhao
@ 2019-09-20  9:15                 ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-20  9:15 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree 
>>>>>>> gets
>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>>>>> loop will break soon in the ieee80211_next_txq() due to 
>>>>>>> schedule_pos
>>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>>> removal right before the end of this schedule round.
>>>>>>> 
>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>> 
>>>>>> I didn't write this patch, so please don't use my sign-off. I'll 
>>>>>> add
>>>>>> ack or review tags as appropriate in reply; but a few comments 
>>>>>> first:
>>>>>> 
>>>>>>> ---
>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>> --- a/include/net/mac80211.h
>>>>>>> +++ b/include/net/mac80211.h
>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>> 
>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>> 
>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>> +
>>>>>>>  static inline void ieee80211_rate_set_vht(struct 
>>>>>>> ieee80211_tx_rate
>>>>>>> *rate,
>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>  {
>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>>>>> ieee80211_hw *hw,
>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>   *
>>>>>>>   * Should only be called between calls to
>>>>>>> ieee80211_txq_schedule_start()
>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will
>>>>>>> be
>>>>>>> added
>>>>>>> + * to a remove list and get removed later.
>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>> eligible.
>>>>>>> If a txq
>>>>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>>>>> after the
>>>>>>>   * driver has finished scheduling it.
>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>   *
>>>>>>> - * Release locks previously acquired by
>>>>>>> ieee80211_txq_schedule_end().
>>>>>>> + * Release locks previously acquired by
>>>>>>> ieee80211_txq_schedule_end().
>>>>>>> Check
>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>   */
>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>>  	__releases(txq_lock);
>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>> 
>>>>>>>  /**
>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>> + *
>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>> + *
>>>>>>> + */
>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>> +
>>>>>>> +/**
>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>>> transmit
>>>>>>>   *
>>>>>>>   * This function is used to check whether given txq is allowed to
>>>>>>> transmit by
>>>>>>> diff --git a/net/mac80211/ieee80211_i.h 
>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>> index a4556f9..49aa143e 100644
>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>  	struct codel_stats cstats;
>>>>>>>  	struct sk_buff_head frags;
>>>>>>>  	struct rb_node schedule_order;
>>>>>>> +	struct list_head candidate;
>>>>>>>  	unsigned long flags;
>>>>>>> 
>>>>>>>  	/* keep last! */
>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>> 
>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>> +	struct timer_list remove_timer;
>>>>>>>  	u16 airtime_flags;
>>>>>>> 
>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>> --- a/net/mac80211/main.c
>>>>>>> +++ b/net/mac80211/main.c
>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>> 
>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>  	}
>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>> 
>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>> +		  jiffies +
>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>> +
>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>> 
>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw)
>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>> 
>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>  #ifdef CONFIG_INET
>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>  #endif
>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>> index d00baaa..42ca010 100644
>>>>>>> --- a/net/mac80211/tx.c
>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>> 
>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>> 
>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw,
>>>>>>> 
>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>> 
>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>> +
>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>  		goto out;
>>>>>>> 
>>>>>>> @@ -3783,6 +3787,20 @@ static void 
>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>> ieee80211_hw *hw,
>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>  }
>>>>>>> 
>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>> +{
>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>> +
>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>> +
>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>> ieee80211_hw *hw,
>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>> 
>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>  }
>>>>>>> 
>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw,
>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>> 
>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>> -	    (skb_queue_empty(&txqi->frags) && 
>>>>>>> !txqi->tin.backlog_packets))
>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>>>>> +
>>>>>>>  }
>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>> 
>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int 
>>>>>>> ac)
>>>>>>> +{
>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>> +	struct sta_info *sta;
>>>>>>> +
>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>> +
>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>> +				 candidate) {
>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>> +
>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>> +		else
>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>> +{
>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>> remove_timer);
>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>> +	struct sta_info *sta;
>>>>>>> +	int ac;
>>>>>>> +
>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>> +		  jiffies +
>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>> +}
>>>>>> 
>>>>>> I'll ask the same as I did last time (where you told me to hold off
>>>>>> until this round):
>>>>>> 
>>>>>> Why do you need the timer and the periodic check? If TXQs are added
>>>>>> to
>>>>>> the remove list during the scheduling run, and
>>>>>> __ieee80211_check_txqs()
>>>>>> is run from schedule_end(), isn't that sufficient to clear the 
>>>>>> list?
>>>>> Is it possible that a txq is not added to the remove list but then
>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>> disconnects
>>>>> without any notification.
>>>> 
>>>> Well as long as all the other cleanup paths call directly into
>>>> __unschedule_txq(), that should remove stations from the scheduler 
>>>> when
>>>> they disconnect etc.
>>> Yes, the disconnect scenario is a bad example. My concern is, say, we
>>> have 10 stations and only one of them is assigned a very small weight
>>> compared with that of others. Suppose, after its chance of Tx, it is
>>> most likely to be placed in the rightmost(still has some packets in 
>>> the
>>> txq) and no more incoming data for it. The remaining packets in txq 
>>> will
>>> be dropped due to timeout algo in codel(correct me if I am wrong) but
>>> this empty txq will stay on the rbtree until other txqs get drained or
>>> global vt catch up with its vt. The staying time could be long if 
>>> weight
>>> is extremely small. Then do we need timer to check or any other better
>>> solution?
>> 
>> Ah, I see what you mean. No, I don't think this will be a problem; the
>> scenario you're describing would play out like this:
>> 
>> 1. Station ends transmitting, still has a single packet queued, gets
>>    moved to the end of the rbtree (and stays there for a while).
>> 
>> 2. When we finally get to the point where this station gets another
>>    chance to transmit, the CoDel drop timer triggers and the last 
>> packet
>>    is dropped[0]. This means that the queue will just be empty
>>    (and ieee80211_tx_dequeue() will return NULL).
>> 
>> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>>    back on the rbtree.
>> 
>> Crucially, in 2. the CoDel algorithm doesn't kick in until the point of
>> packet dequeue. But even if an empty queue stays on the rbtree for a
>> while, there is no harm in that: eventually it will get its turn, it
>> will turn out to be empty, and just be skipped over.
> Then that will be fine. Thanks for the explanation of the dropping part 
> in CoDel algorithm.

Yup, think so. And you're welcome :)

>> The issue we need to be concerned about is the opposite: If we have a
>> queue that *does* have packets queued, but which is *not* scheduled for
>> transmission, that will stall TX.
> Is it by design since its vt is more than global vt, right? The lattency 
> may somehow get impacted though.

Well, it should still stay on the rbtree as long as it has packets
queued. We don't have a check anywhere that reschedules TXQs whose v_t
drops below global v_t...

>> [0] CoDel in most cases only drops a single packet at a time, so it 
>> will
>> not clear out an entire queue with multiple packets in one go. But you
>> are right that it could conceivably drop the last packet in a queue.
>> 
>>>> We only need to defer removal inside a single "scheduling round" 
>>>> (i.e.,
>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just 
>>>> walk
>>>> the remove list in schedule_end() we should be enough, no?
>>>> 
>>>> Hmm, or maybe a simpler way to fix the original issue is just to have
>>>> unschedule_txq() update the schedule_pos() pointer?
>>>> 
>>>> I.e., unschedule_txq checks if the txq being removed is currently 
>>>> being
>>>> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos
>>>> to
>>>> be the rb_next of the current value?
>>> Actually, if schedule_pos is updated to rb_next of the current value,
>>> then in the next_txq() where we are going to use rb_next again and
>>> finally pick the next node of the node we really want. Is it fine to
>>> update schedule_pos to NULL?
>> 
>> Hmm, yeah, good point.
>> 
>> If we do end up setting schedule_pos to NULL in the middle of a
>> scheduling round, that will make next_txq() "start over", and do 
>> another
>> loop through the whole thing. I guess we may be able hit a case where
>> things can oscillate back and forth between addition and removal
>> resulting in an infinite loop? Not sure, but at least I can't seem to
>> convince myself that this can't happen.
>
> As the loop of next_txq under lock protection as below,
>
> txq_schedule_start();
> while(txq=next_txq()){
> ...
> return_txq(txq);
> }
> txq_schedule_end();
>
> I do not see any chance of addition, no?

As you noted in your other email, Felix reduced the locking. And yeah,
we need to rebase this series to also incorporate that. I figure I can
send an updated version of the first patch in the series once we've
worked out the remaining issues with your follow-up patches.

> In ath10k, we will usually push packets of first txq as many as we can
> until it is drained and then move to the next one. So if a txq gets
> removed in the return_txq, it should always be the leftmost. And
> during this period, neither vt of any station or global vt can be
> updated due to lock protection.
>
>> 
>> But in that case, we could fix it by just conditionally assigning 
>> either
>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>> something like:
>> 
>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
> I am not sure I am getting your point. Still in next_txq, 
> schedule_pos[ac] will lead us to the next node of the one we want.

The logic in next_txq is different when schedule_pos[ac] is NULL, vs
when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
scheduling round, while the latter ends the current round.

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-20  9:15                 ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-20  9:15 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree 
>>>>>>> gets
>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), the
>>>>>>> loop will break soon in the ieee80211_next_txq() due to 
>>>>>>> schedule_pos
>>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>>> removal right before the end of this schedule round.
>>>>>>> 
>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>> 
>>>>>> I didn't write this patch, so please don't use my sign-off. I'll 
>>>>>> add
>>>>>> ack or review tags as appropriate in reply; but a few comments 
>>>>>> first:
>>>>>> 
>>>>>>> ---
>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>> --- a/include/net/mac80211.h
>>>>>>> +++ b/include/net/mac80211.h
>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>> 
>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>> 
>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>> +
>>>>>>>  static inline void ieee80211_rate_set_vht(struct 
>>>>>>> ieee80211_tx_rate
>>>>>>> *rate,
>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>  {
>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct
>>>>>>> ieee80211_hw *hw,
>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>   *
>>>>>>>   * Should only be called between calls to
>>>>>>> ieee80211_txq_schedule_start()
>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it will
>>>>>>> be
>>>>>>> added
>>>>>>> + * to a remove list and get removed later.
>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>> eligible.
>>>>>>> If a txq
>>>>>>>   * is returned, it should be returned with ieee80211_return_txq()
>>>>>>> after the
>>>>>>>   * driver has finished scheduling it.
>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>   *
>>>>>>> - * Release locks previously acquired by
>>>>>>> ieee80211_txq_schedule_end().
>>>>>>> + * Release locks previously acquired by
>>>>>>> ieee80211_txq_schedule_end().
>>>>>>> Check
>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>   */
>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>>  	__releases(txq_lock);
>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>> 
>>>>>>>  /**
>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>> + *
>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>> + *
>>>>>>> + */
>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>> +
>>>>>>> +/**
>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>>> transmit
>>>>>>>   *
>>>>>>>   * This function is used to check whether given txq is allowed to
>>>>>>> transmit by
>>>>>>> diff --git a/net/mac80211/ieee80211_i.h 
>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>> index a4556f9..49aa143e 100644
>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>  	struct codel_stats cstats;
>>>>>>>  	struct sk_buff_head frags;
>>>>>>>  	struct rb_node schedule_order;
>>>>>>> +	struct list_head candidate;
>>>>>>>  	unsigned long flags;
>>>>>>> 
>>>>>>>  	/* keep last! */
>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>> 
>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>> +	struct timer_list remove_timer;
>>>>>>>  	u16 airtime_flags;
>>>>>>> 
>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>> --- a/net/mac80211/main.c
>>>>>>> +++ b/net/mac80211/main.c
>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>> 
>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>  	}
>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>> 
>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>> +		  jiffies +
>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>> +
>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>> 
>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw)
>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>> 
>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>  #ifdef CONFIG_INET
>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>  #endif
>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>> index d00baaa..42ca010 100644
>>>>>>> --- a/net/mac80211/tx.c
>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>> 
>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>> 
>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw,
>>>>>>> 
>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>> 
>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>> +
>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>  		goto out;
>>>>>>> 
>>>>>>> @@ -3783,6 +3787,20 @@ static void 
>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>> ieee80211_hw *hw,
>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>  }
>>>>>>> 
>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>> +{
>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>> +
>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>> +
>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>> ieee80211_hw *hw,
>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>> 
>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>  }
>>>>>>> 
>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>> ieee80211_hw
>>>>>>> *hw,
>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>> 
>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>> -	    (skb_queue_empty(&txqi->frags) && 
>>>>>>> !txqi->tin.backlog_packets))
>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>> +		list_add_tail(&txqi->candidate, &local->remove_list[txq->ac]);
>>>>>>> +
>>>>>>>  }
>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>> 
>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int 
>>>>>>> ac)
>>>>>>> +{
>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>> +	struct sta_info *sta;
>>>>>>> +
>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>> +
>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>> +				 candidate) {
>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>> +
>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>> +		else
>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>> +{
>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>> remove_timer);
>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>> +	struct sta_info *sta;
>>>>>>> +	int ac;
>>>>>>> +
>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>> +		  jiffies +
>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>> +}
>>>>>> 
>>>>>> I'll ask the same as I did last time (where you told me to hold off
>>>>>> until this round):
>>>>>> 
>>>>>> Why do you need the timer and the periodic check? If TXQs are added
>>>>>> to
>>>>>> the remove list during the scheduling run, and
>>>>>> __ieee80211_check_txqs()
>>>>>> is run from schedule_end(), isn't that sufficient to clear the 
>>>>>> list?
>>>>> Is it possible that a txq is not added to the remove list but then
>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>> disconnects
>>>>> without any notification.
>>>> 
>>>> Well as long as all the other cleanup paths call directly into
>>>> __unschedule_txq(), that should remove stations from the scheduler 
>>>> when
>>>> they disconnect etc.
>>> Yes, the disconnect scenario is a bad example. My concern is, say, we
>>> have 10 stations and only one of them is assigned a very small weight
>>> compared with that of others. Suppose, after its chance of Tx, it is
>>> most likely to be placed in the rightmost(still has some packets in 
>>> the
>>> txq) and no more incoming data for it. The remaining packets in txq 
>>> will
>>> be dropped due to timeout algo in codel(correct me if I am wrong) but
>>> this empty txq will stay on the rbtree until other txqs get drained or
>>> global vt catch up with its vt. The staying time could be long if 
>>> weight
>>> is extremely small. Then do we need timer to check or any other better
>>> solution?
>> 
>> Ah, I see what you mean. No, I don't think this will be a problem; the
>> scenario you're describing would play out like this:
>> 
>> 1. Station ends transmitting, still has a single packet queued, gets
>>    moved to the end of the rbtree (and stays there for a while).
>> 
>> 2. When we finally get to the point where this station gets another
>>    chance to transmit, the CoDel drop timer triggers and the last 
>> packet
>>    is dropped[0]. This means that the queue will just be empty
>>    (and ieee80211_tx_dequeue() will return NULL).
>> 
>> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>>    back on the rbtree.
>> 
>> Crucially, in 2. the CoDel algorithm doesn't kick in until the point of
>> packet dequeue. But even if an empty queue stays on the rbtree for a
>> while, there is no harm in that: eventually it will get its turn, it
>> will turn out to be empty, and just be skipped over.
> Then that will be fine. Thanks for the explanation of the dropping part 
> in CoDel algorithm.

Yup, think so. And you're welcome :)

>> The issue we need to be concerned about is the opposite: If we have a
>> queue that *does* have packets queued, but which is *not* scheduled for
>> transmission, that will stall TX.
> Is it by design since its vt is more than global vt, right? The lattency 
> may somehow get impacted though.

Well, it should still stay on the rbtree as long as it has packets
queued. We don't have a check anywhere that reschedules TXQs whose v_t
drops below global v_t...

>> [0] CoDel in most cases only drops a single packet at a time, so it 
>> will
>> not clear out an entire queue with multiple packets in one go. But you
>> are right that it could conceivably drop the last packet in a queue.
>> 
>>>> We only need to defer removal inside a single "scheduling round" 
>>>> (i.e.,
>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just 
>>>> walk
>>>> the remove list in schedule_end() we should be enough, no?
>>>> 
>>>> Hmm, or maybe a simpler way to fix the original issue is just to have
>>>> unschedule_txq() update the schedule_pos() pointer?
>>>> 
>>>> I.e., unschedule_txq checks if the txq being removed is currently 
>>>> being
>>>> pointed to by schedule_pos[ac], and if it is, it updates schedule_pos
>>>> to
>>>> be the rb_next of the current value?
>>> Actually, if schedule_pos is updated to rb_next of the current value,
>>> then in the next_txq() where we are going to use rb_next again and
>>> finally pick the next node of the node we really want. Is it fine to
>>> update schedule_pos to NULL?
>> 
>> Hmm, yeah, good point.
>> 
>> If we do end up setting schedule_pos to NULL in the middle of a
>> scheduling round, that will make next_txq() "start over", and do 
>> another
>> loop through the whole thing. I guess we may be able hit a case where
>> things can oscillate back and forth between addition and removal
>> resulting in an infinite loop? Not sure, but at least I can't seem to
>> convince myself that this can't happen.
>
> As the loop of next_txq under lock protection as below,
>
> txq_schedule_start();
> while(txq=next_txq()){
> ...
> return_txq(txq);
> }
> txq_schedule_end();
>
> I do not see any chance of addition, no?

As you noted in your other email, Felix reduced the locking. And yeah,
we need to rebase this series to also incorporate that. I figure I can
send an updated version of the first patch in the series once we've
worked out the remaining issues with your follow-up patches.

> In ath10k, we will usually push packets of first txq as many as we can
> until it is drained and then move to the next one. So if a txq gets
> removed in the return_txq, it should always be the leftmost. And
> during this period, neither vt of any station or global vt can be
> updated due to lock protection.
>
>> 
>> But in that case, we could fix it by just conditionally assigning 
>> either
>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>> something like:
>> 
>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
> I am not sure I am getting your point. Still in next_txq, 
> schedule_pos[ac] will lead us to the next node of the one we want.

The logic in next_txq is different when schedule_pos[ac] is NULL, vs
when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
scheduling round, while the latter ends the current round.

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-20  9:15                 ` Toke Høiland-Jørgensen
@ 2019-09-21 10:49                   ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 10:49 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree
>>>>>>>> gets
>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), 
>>>>>>>> the
>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>> schedule_pos
>>>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>>>> removal right before the end of this schedule round.
>>>>>>>> 
>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>> 
>>>>>>> I didn't write this patch, so please don't use my sign-off. I'll
>>>>>>> add
>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>> first:
>>>>>>> 
>>>>>>>> ---
>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>> 
>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>> 
>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>> +
>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>> ieee80211_tx_rate
>>>>>>>> *rate,
>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>  {
>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff 
>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>> ieee80211_hw *hw,
>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>   *
>>>>>>>>   * Should only be called between calls to
>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it 
>>>>>>>> will
>>>>>>>> be
>>>>>>>> added
>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>> eligible.
>>>>>>>> If a txq
>>>>>>>>   * is returned, it should be returned with 
>>>>>>>> ieee80211_return_txq()
>>>>>>>> after the
>>>>>>>>   * driver has finished scheduling it.
>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>   *
>>>>>>>> - * Release locks previously acquired by
>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>> + * Release locks previously acquired by
>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>> Check
>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>   */
>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>>>  	__releases(txq_lock);
>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>> 
>>>>>>>>  /**
>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>> + *
>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>>>> transmit
>>>>>>>>   *
>>>>>>>>   * This function is used to check whether given txq is allowed 
>>>>>>>> to
>>>>>>>> transmit by
>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>> +	struct list_head candidate;
>>>>>>>>  	unsigned long flags;
>>>>>>>> 
>>>>>>>>  	/* keep last! */
>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>> 
>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>  	u16 airtime_flags;
>>>>>>>> 
>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>> 
>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>  	}
>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>> 
>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>> +		  jiffies +
>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>> +
>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>> 
>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw)
>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>> 
>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>  #endif
>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>> 
>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>> 
>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw,
>>>>>>>> 
>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>> 
>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>> +
>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>  		goto out;
>>>>>>>> 
>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>> ieee80211_hw *hw,
>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>  }
>>>>>>>> 
>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>> +{
>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>> +
>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>> +
>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>> +	}
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>> ieee80211_hw *hw,
>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>> 
>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>  }
>>>>>>>> 
>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw,
>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>> 
>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>> +		list_add_tail(&txqi->candidate, 
>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>> +
>>>>>>>>  }
>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>> 
>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int
>>>>>>>> ac)
>>>>>>>> +{
>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>> +	struct sta_info *sta;
>>>>>>>> +
>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>> +
>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>> +				 candidate) {
>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>> +
>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>> +		else
>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>> +	}
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>> +{
>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>> remove_timer);
>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>> +	struct sta_info *sta;
>>>>>>>> +	int ac;
>>>>>>>> +
>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>> +	}
>>>>>>>> +
>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>> +		  jiffies +
>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>> +}
>>>>>>> 
>>>>>>> I'll ask the same as I did last time (where you told me to hold 
>>>>>>> off
>>>>>>> until this round):
>>>>>>> 
>>>>>>> Why do you need the timer and the periodic check? If TXQs are 
>>>>>>> added
>>>>>>> to
>>>>>>> the remove list during the scheduling run, and
>>>>>>> __ieee80211_check_txqs()
>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>> list?
>>>>>> Is it possible that a txq is not added to the remove list but then
>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>> disconnects
>>>>>> without any notification.
>>>>> 
>>>>> Well as long as all the other cleanup paths call directly into
>>>>> __unschedule_txq(), that should remove stations from the scheduler
>>>>> when
>>>>> they disconnect etc.
>>>> Yes, the disconnect scenario is a bad example. My concern is, say, 
>>>> we
>>>> have 10 stations and only one of them is assigned a very small 
>>>> weight
>>>> compared with that of others. Suppose, after its chance of Tx, it is
>>>> most likely to be placed in the rightmost(still has some packets in
>>>> the
>>>> txq) and no more incoming data for it. The remaining packets in txq
>>>> will
>>>> be dropped due to timeout algo in codel(correct me if I am wrong) 
>>>> but
>>>> this empty txq will stay on the rbtree until other txqs get drained 
>>>> or
>>>> global vt catch up with its vt. The staying time could be long if
>>>> weight
>>>> is extremely small. Then do we need timer to check or any other 
>>>> better
>>>> solution?
>>> 
>>> Ah, I see what you mean. No, I don't think this will be a problem; 
>>> the
>>> scenario you're describing would play out like this:
>>> 
>>> 1. Station ends transmitting, still has a single packet queued, gets
>>>    moved to the end of the rbtree (and stays there for a while).
>>> 
>>> 2. When we finally get to the point where this station gets another
>>>    chance to transmit, the CoDel drop timer triggers and the last
>>> packet
>>>    is dropped[0]. This means that the queue will just be empty
>>>    (and ieee80211_tx_dequeue() will return NULL).
>>> 
>>> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>>>    back on the rbtree.
>>> 
>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the point 
>>> of
>>> packet dequeue. But even if an empty queue stays on the rbtree for a
>>> while, there is no harm in that: eventually it will get its turn, it
>>> will turn out to be empty, and just be skipped over.
>> Then that will be fine. Thanks for the explanation of the dropping 
>> part
>> in CoDel algorithm.
> 
> Yup, think so. And you're welcome :)
> 
>>> The issue we need to be concerned about is the opposite: If we have a
>>> queue that *does* have packets queued, but which is *not* scheduled 
>>> for
>>> transmission, that will stall TX.
>> Is it by design since its vt is more than global vt, right? The 
>> lattency
>> may somehow get impacted though.
> 
> Well, it should still stay on the rbtree as long as it has packets
> queued. We don't have a check anywhere that reschedules TXQs whose v_t
> drops below global v_t...
> 
>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>> will
>>> not clear out an entire queue with multiple packets in one go. But 
>>> you
>>> are right that it could conceivably drop the last packet in a queue.
>>> 
>>>>> We only need to defer removal inside a single "scheduling round"
>>>>> (i.e.,
>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>> walk
>>>>> the remove list in schedule_end() we should be enough, no?
>>>>> 
>>>>> Hmm, or maybe a simpler way to fix the original issue is just to 
>>>>> have
>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>> 
>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>> being
>>>>> pointed to by schedule_pos[ac], and if it is, it updates 
>>>>> schedule_pos
>>>>> to
>>>>> be the rb_next of the current value?
>>>> Actually, if schedule_pos is updated to rb_next of the current 
>>>> value,
>>>> then in the next_txq() where we are going to use rb_next again and
>>>> finally pick the next node of the node we really want. Is it fine to
>>>> update schedule_pos to NULL?
>>> 
>>> Hmm, yeah, good point.
>>> 
>>> If we do end up setting schedule_pos to NULL in the middle of a
>>> scheduling round, that will make next_txq() "start over", and do
>>> another
>>> loop through the whole thing. I guess we may be able hit a case where
>>> things can oscillate back and forth between addition and removal
>>> resulting in an infinite loop? Not sure, but at least I can't seem to
>>> convince myself that this can't happen.
>> 
>> As the loop of next_txq under lock protection as below,
>> 
>> txq_schedule_start();
>> while(txq=next_txq()){
>> ...
>> return_txq(txq);
>> }
>> txq_schedule_end();
>> 
>> I do not see any chance of addition, no?
> 
> As you noted in your other email, Felix reduced the locking. And yeah,
> we need to rebase this series to also incorporate that. I figure I can
> send an updated version of the first patch in the series once we've
> worked out the remaining issues with your follow-up patches.
> 
Oh, I was thinking we were discussing without locking reduced. Yes, I 
also agree there might be a case causing infinite loop. With locking 
reduced, the tree can be adjusted between next_txq() and return_txq() in 
the loop situation. For further discussion, let 's consider,
1) the tree starts like:
        A->B->C->D->E
2) then next_txq() returns A for dequeuing
3) driver dequeues A and draines A without any active txq locked meaning 
the tree could be changed upon Tx compeletion.
4) then in return_txq(), the tree could be,
        i   A->B->C->D->E (A is empty, and maybe soon be added back 
before the loop end)
        ii  B->C->A->D->E (A is empty, and maybe soon be added back 
before the loop end)
        iii B->C->D->E->A (A is empty, and maybe soon be added back 
before the loop end)

with this change:
  local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);

for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in 
next_txq(), rb_next(B) is what we returns which actually is C and B is 
skipped, no?

Similiar for case ii, we skip B, C, D.

Also I am wondering if there will be some SMP issues relating with 
local->schedule_pos[ac].

>> In ath10k, we will usually push packets of first txq as many as we can
>> until it is drained and then move to the next one. So if a txq gets
>> removed in the return_txq, it should always be the leftmost. And
>> during this period, neither vt of any station or global vt can be
>> updated due to lock protection.
>> 
>>> 
>>> But in that case, we could fix it by just conditionally assigning
>>> either
>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>> something like:
>>> 
>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>> I am not sure I am getting your point. Still in next_txq,
>> schedule_pos[ac] will lead us to the next node of the one we want.
> 
> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
> scheduling round, while the latter ends the current round.
> 
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 10:49                   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 10:49 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree
>>>>>>>> gets
>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), 
>>>>>>>> the
>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>> schedule_pos
>>>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>>>> removal right before the end of this schedule round.
>>>>>>>> 
>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>> 
>>>>>>> I didn't write this patch, so please don't use my sign-off. I'll
>>>>>>> add
>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>> first:
>>>>>>> 
>>>>>>>> ---
>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>> 
>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>> 
>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>> +
>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>> ieee80211_tx_rate
>>>>>>>> *rate,
>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>  {
>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff 
>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>> ieee80211_hw *hw,
>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>   *
>>>>>>>>   * Should only be called between calls to
>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it 
>>>>>>>> will
>>>>>>>> be
>>>>>>>> added
>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>> eligible.
>>>>>>>> If a txq
>>>>>>>>   * is returned, it should be returned with 
>>>>>>>> ieee80211_return_txq()
>>>>>>>> after the
>>>>>>>>   * driver has finished scheduling it.
>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>   *
>>>>>>>> - * Release locks previously acquired by
>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>> + * Release locks previously acquired by
>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>> Check
>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>   */
>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>>>  	__releases(txq_lock);
>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>> 
>>>>>>>>  /**
>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>> + *
>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>> + *
>>>>>>>> + */
>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>> +
>>>>>>>> +/**
>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>>>> transmit
>>>>>>>>   *
>>>>>>>>   * This function is used to check whether given txq is allowed 
>>>>>>>> to
>>>>>>>> transmit by
>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>> +	struct list_head candidate;
>>>>>>>>  	unsigned long flags;
>>>>>>>> 
>>>>>>>>  	/* keep last! */
>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>> 
>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>  	u16 airtime_flags;
>>>>>>>> 
>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>> 
>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>  	}
>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>> 
>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>> +		  jiffies +
>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>> +
>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>> 
>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw)
>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>> 
>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>  #endif
>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>> 
>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>> 
>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw,
>>>>>>>> 
>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>> 
>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>> +
>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>  		goto out;
>>>>>>>> 
>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>> ieee80211_hw *hw,
>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>  }
>>>>>>>> 
>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>> +{
>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>> +
>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>> +
>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>> +	}
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>> ieee80211_hw *hw,
>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>> 
>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>  }
>>>>>>>> 
>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>> ieee80211_hw
>>>>>>>> *hw,
>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>> 
>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>> +		list_add_tail(&txqi->candidate, 
>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>> +
>>>>>>>>  }
>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>> 
>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int
>>>>>>>> ac)
>>>>>>>> +{
>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>> +	struct sta_info *sta;
>>>>>>>> +
>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>> +
>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>> +				 candidate) {
>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>> +
>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>> +		else
>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>> +	}
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>> +{
>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>> remove_timer);
>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>> +	struct sta_info *sta;
>>>>>>>> +	int ac;
>>>>>>>> +
>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>> +	}
>>>>>>>> +
>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>> +		  jiffies +
>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>> +}
>>>>>>> 
>>>>>>> I'll ask the same as I did last time (where you told me to hold 
>>>>>>> off
>>>>>>> until this round):
>>>>>>> 
>>>>>>> Why do you need the timer and the periodic check? If TXQs are 
>>>>>>> added
>>>>>>> to
>>>>>>> the remove list during the scheduling run, and
>>>>>>> __ieee80211_check_txqs()
>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>> list?
>>>>>> Is it possible that a txq is not added to the remove list but then
>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>> disconnects
>>>>>> without any notification.
>>>>> 
>>>>> Well as long as all the other cleanup paths call directly into
>>>>> __unschedule_txq(), that should remove stations from the scheduler
>>>>> when
>>>>> they disconnect etc.
>>>> Yes, the disconnect scenario is a bad example. My concern is, say, 
>>>> we
>>>> have 10 stations and only one of them is assigned a very small 
>>>> weight
>>>> compared with that of others. Suppose, after its chance of Tx, it is
>>>> most likely to be placed in the rightmost(still has some packets in
>>>> the
>>>> txq) and no more incoming data for it. The remaining packets in txq
>>>> will
>>>> be dropped due to timeout algo in codel(correct me if I am wrong) 
>>>> but
>>>> this empty txq will stay on the rbtree until other txqs get drained 
>>>> or
>>>> global vt catch up with its vt. The staying time could be long if
>>>> weight
>>>> is extremely small. Then do we need timer to check or any other 
>>>> better
>>>> solution?
>>> 
>>> Ah, I see what you mean. No, I don't think this will be a problem; 
>>> the
>>> scenario you're describing would play out like this:
>>> 
>>> 1. Station ends transmitting, still has a single packet queued, gets
>>>    moved to the end of the rbtree (and stays there for a while).
>>> 
>>> 2. When we finally get to the point where this station gets another
>>>    chance to transmit, the CoDel drop timer triggers and the last
>>> packet
>>>    is dropped[0]. This means that the queue will just be empty
>>>    (and ieee80211_tx_dequeue() will return NULL).
>>> 
>>> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>>>    back on the rbtree.
>>> 
>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the point 
>>> of
>>> packet dequeue. But even if an empty queue stays on the rbtree for a
>>> while, there is no harm in that: eventually it will get its turn, it
>>> will turn out to be empty, and just be skipped over.
>> Then that will be fine. Thanks for the explanation of the dropping 
>> part
>> in CoDel algorithm.
> 
> Yup, think so. And you're welcome :)
> 
>>> The issue we need to be concerned about is the opposite: If we have a
>>> queue that *does* have packets queued, but which is *not* scheduled 
>>> for
>>> transmission, that will stall TX.
>> Is it by design since its vt is more than global vt, right? The 
>> lattency
>> may somehow get impacted though.
> 
> Well, it should still stay on the rbtree as long as it has packets
> queued. We don't have a check anywhere that reschedules TXQs whose v_t
> drops below global v_t...
> 
>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>> will
>>> not clear out an entire queue with multiple packets in one go. But 
>>> you
>>> are right that it could conceivably drop the last packet in a queue.
>>> 
>>>>> We only need to defer removal inside a single "scheduling round"
>>>>> (i.e.,
>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>> walk
>>>>> the remove list in schedule_end() we should be enough, no?
>>>>> 
>>>>> Hmm, or maybe a simpler way to fix the original issue is just to 
>>>>> have
>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>> 
>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>> being
>>>>> pointed to by schedule_pos[ac], and if it is, it updates 
>>>>> schedule_pos
>>>>> to
>>>>> be the rb_next of the current value?
>>>> Actually, if schedule_pos is updated to rb_next of the current 
>>>> value,
>>>> then in the next_txq() where we are going to use rb_next again and
>>>> finally pick the next node of the node we really want. Is it fine to
>>>> update schedule_pos to NULL?
>>> 
>>> Hmm, yeah, good point.
>>> 
>>> If we do end up setting schedule_pos to NULL in the middle of a
>>> scheduling round, that will make next_txq() "start over", and do
>>> another
>>> loop through the whole thing. I guess we may be able hit a case where
>>> things can oscillate back and forth between addition and removal
>>> resulting in an infinite loop? Not sure, but at least I can't seem to
>>> convince myself that this can't happen.
>> 
>> As the loop of next_txq under lock protection as below,
>> 
>> txq_schedule_start();
>> while(txq=next_txq()){
>> ...
>> return_txq(txq);
>> }
>> txq_schedule_end();
>> 
>> I do not see any chance of addition, no?
> 
> As you noted in your other email, Felix reduced the locking. And yeah,
> we need to rebase this series to also incorporate that. I figure I can
> send an updated version of the first patch in the series once we've
> worked out the remaining issues with your follow-up patches.
> 
Oh, I was thinking we were discussing without locking reduced. Yes, I 
also agree there might be a case causing infinite loop. With locking 
reduced, the tree can be adjusted between next_txq() and return_txq() in 
the loop situation. For further discussion, let 's consider,
1) the tree starts like:
        A->B->C->D->E
2) then next_txq() returns A for dequeuing
3) driver dequeues A and draines A without any active txq locked meaning 
the tree could be changed upon Tx compeletion.
4) then in return_txq(), the tree could be,
        i   A->B->C->D->E (A is empty, and maybe soon be added back 
before the loop end)
        ii  B->C->A->D->E (A is empty, and maybe soon be added back 
before the loop end)
        iii B->C->D->E->A (A is empty, and maybe soon be added back 
before the loop end)

with this change:
  local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);

for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in 
next_txq(), rb_next(B) is what we returns which actually is C and B is 
skipped, no?

Similiar for case ii, we skip B, C, D.

Also I am wondering if there will be some SMP issues relating with 
local->schedule_pos[ac].

>> In ath10k, we will usually push packets of first txq as many as we can
>> until it is drained and then move to the next one. So if a txq gets
>> removed in the return_txq, it should always be the leftmost. And
>> during this period, neither vt of any station or global vt can be
>> updated due to lock protection.
>> 
>>> 
>>> But in that case, we could fix it by just conditionally assigning
>>> either
>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>> something like:
>>> 
>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>> I am not sure I am getting your point. Still in next_txq,
>> schedule_pos[ac] will lead us to the next node of the one we want.
> 
> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
> scheduling round, while the latter ends the current round.
> 
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 10:49                   ` Yibo Zhao
@ 2019-09-21 11:27                     ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-21 11:27 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree
>>>>>>>>> gets
>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), 
>>>>>>>>> the
>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>> schedule_pos
>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>> 
>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>> 
>>>>>>>> I didn't write this patch, so please don't use my sign-off. I'll
>>>>>>>> add
>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>> first:
>>>>>>>> 
>>>>>>>>> ---
>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>> 
>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>> 
>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>> +
>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>> ieee80211_tx_rate
>>>>>>>>> *rate,
>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>  {
>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff 
>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>   *
>>>>>>>>>   * Should only be called between calls to
>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it 
>>>>>>>>> will
>>>>>>>>> be
>>>>>>>>> added
>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>> eligible.
>>>>>>>>> If a txq
>>>>>>>>>   * is returned, it should be returned with 
>>>>>>>>> ieee80211_return_txq()
>>>>>>>>> after the
>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>   *
>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>> Check
>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>   */
>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>> 
>>>>>>>>>  /**
>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>> + *
>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>> + *
>>>>>>>>> + */
>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>>>>> transmit
>>>>>>>>>   *
>>>>>>>>>   * This function is used to check whether given txq is allowed 
>>>>>>>>> to
>>>>>>>>> transmit by
>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>  	unsigned long flags;
>>>>>>>>> 
>>>>>>>>>  	/* keep last! */
>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>> 
>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>> 
>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>> 
>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>  	}
>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>> 
>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>> +		  jiffies +
>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>> +
>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>> 
>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw)
>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>> 
>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>  #endif
>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>> 
>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>> 
>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw,
>>>>>>>>> 
>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>> 
>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>> +
>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>  		goto out;
>>>>>>>>> 
>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>  }
>>>>>>>>> 
>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>> +{
>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>> +
>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>> +
>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>> +	}
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>> 
>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>  }
>>>>>>>>> 
>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw,
>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>> 
>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>> +		list_add_tail(&txqi->candidate, 
>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>> +
>>>>>>>>>  }
>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>> 
>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int
>>>>>>>>> ac)
>>>>>>>>> +{
>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>> +
>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>> +
>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>> +				 candidate) {
>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>> +
>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>> +		else
>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>> +	}
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>> +{
>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>> remove_timer);
>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>> +	int ac;
>>>>>>>>> +
>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>> +		  jiffies +
>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>> +}
>>>>>>>> 
>>>>>>>> I'll ask the same as I did last time (where you told me to hold 
>>>>>>>> off
>>>>>>>> until this round):
>>>>>>>> 
>>>>>>>> Why do you need the timer and the periodic check? If TXQs are 
>>>>>>>> added
>>>>>>>> to
>>>>>>>> the remove list during the scheduling run, and
>>>>>>>> __ieee80211_check_txqs()
>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>> list?
>>>>>>> Is it possible that a txq is not added to the remove list but then
>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>> disconnects
>>>>>>> without any notification.
>>>>>> 
>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>> __unschedule_txq(), that should remove stations from the scheduler
>>>>>> when
>>>>>> they disconnect etc.
>>>>> Yes, the disconnect scenario is a bad example. My concern is, say, 
>>>>> we
>>>>> have 10 stations and only one of them is assigned a very small 
>>>>> weight
>>>>> compared with that of others. Suppose, after its chance of Tx, it is
>>>>> most likely to be placed in the rightmost(still has some packets in
>>>>> the
>>>>> txq) and no more incoming data for it. The remaining packets in txq
>>>>> will
>>>>> be dropped due to timeout algo in codel(correct me if I am wrong) 
>>>>> but
>>>>> this empty txq will stay on the rbtree until other txqs get drained 
>>>>> or
>>>>> global vt catch up with its vt. The staying time could be long if
>>>>> weight
>>>>> is extremely small. Then do we need timer to check or any other 
>>>>> better
>>>>> solution?
>>>> 
>>>> Ah, I see what you mean. No, I don't think this will be a problem; 
>>>> the
>>>> scenario you're describing would play out like this:
>>>> 
>>>> 1. Station ends transmitting, still has a single packet queued, gets
>>>>    moved to the end of the rbtree (and stays there for a while).
>>>> 
>>>> 2. When we finally get to the point where this station gets another
>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>> packet
>>>>    is dropped[0]. This means that the queue will just be empty
>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>> 
>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>>>>    back on the rbtree.
>>>> 
>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the point 
>>>> of
>>>> packet dequeue. But even if an empty queue stays on the rbtree for a
>>>> while, there is no harm in that: eventually it will get its turn, it
>>>> will turn out to be empty, and just be skipped over.
>>> Then that will be fine. Thanks for the explanation of the dropping 
>>> part
>>> in CoDel algorithm.
>> 
>> Yup, think so. And you're welcome :)
>> 
>>>> The issue we need to be concerned about is the opposite: If we have a
>>>> queue that *does* have packets queued, but which is *not* scheduled 
>>>> for
>>>> transmission, that will stall TX.
>>> Is it by design since its vt is more than global vt, right? The 
>>> lattency
>>> may somehow get impacted though.
>> 
>> Well, it should still stay on the rbtree as long as it has packets
>> queued. We don't have a check anywhere that reschedules TXQs whose v_t
>> drops below global v_t...
>> 
>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>> will
>>>> not clear out an entire queue with multiple packets in one go. But 
>>>> you
>>>> are right that it could conceivably drop the last packet in a queue.
>>>> 
>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>> (i.e.,
>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>> walk
>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>> 
>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to 
>>>>>> have
>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>> 
>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>> being
>>>>>> pointed to by schedule_pos[ac], and if it is, it updates 
>>>>>> schedule_pos
>>>>>> to
>>>>>> be the rb_next of the current value?
>>>>> Actually, if schedule_pos is updated to rb_next of the current 
>>>>> value,
>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>> finally pick the next node of the node we really want. Is it fine to
>>>>> update schedule_pos to NULL?
>>>> 
>>>> Hmm, yeah, good point.
>>>> 
>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>> scheduling round, that will make next_txq() "start over", and do
>>>> another
>>>> loop through the whole thing. I guess we may be able hit a case where
>>>> things can oscillate back and forth between addition and removal
>>>> resulting in an infinite loop? Not sure, but at least I can't seem to
>>>> convince myself that this can't happen.
>>> 
>>> As the loop of next_txq under lock protection as below,
>>> 
>>> txq_schedule_start();
>>> while(txq=next_txq()){
>>> ...
>>> return_txq(txq);
>>> }
>>> txq_schedule_end();
>>> 
>>> I do not see any chance of addition, no?
>> 
>> As you noted in your other email, Felix reduced the locking. And yeah,
>> we need to rebase this series to also incorporate that. I figure I can
>> send an updated version of the first patch in the series once we've
>> worked out the remaining issues with your follow-up patches.
>> 
> Oh, I was thinking we were discussing without locking reduced. Yes, I 
> also agree there might be a case causing infinite loop. With locking 
> reduced, the tree can be adjusted between next_txq() and return_txq() in 
> the loop situation. For further discussion, let 's consider,
> 1) the tree starts like:
>         A->B->C->D->E
> 2) then next_txq() returns A for dequeuing
> 3) driver dequeues A and draines A without any active txq locked meaning 
> the tree could be changed upon Tx compeletion.
> 4) then in return_txq(), the tree could be,
>         i   A->B->C->D->E (A is empty, and maybe soon be added back 
> before the loop end)
>         ii  B->C->A->D->E (A is empty, and maybe soon be added back 
> before the loop end)
>         iii B->C->D->E->A (A is empty, and maybe soon be added back 
> before the loop end)
>
> with this change:
>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>
> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in 
> next_txq(), rb_next(B) is what we returns which actually is C and B is 
> skipped, no?
>
> Similiar for case ii, we skip B, C, D.

Yup, I think you're right. But if we can fix this by making
ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
resort_txq() acts on the txq that's currently in schedule_pos, it will
update schedule pos with the same rb_next(node) ?: rb_prev(node);
(optionally after checking that the position of the node is actually
going to change).

> Also I am wondering if there will be some SMP issues relating with 
> local->schedule_pos[ac].

Not sure what you mean by this?

>>> In ath10k, we will usually push packets of first txq as many as we can
>>> until it is drained and then move to the next one. So if a txq gets
>>> removed in the return_txq, it should always be the leftmost. And
>>> during this period, neither vt of any station or global vt can be
>>> updated due to lock protection.
>>> 
>>>> 
>>>> But in that case, we could fix it by just conditionally assigning
>>>> either
>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>> something like:
>>>> 
>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>> I am not sure I am getting your point. Still in next_txq,
>>> schedule_pos[ac] will lead us to the next node of the one we want.
>> 
>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>> scheduling round, while the latter ends the current round.
>> 
>> -Toke
>
> -- 
> Yibo


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 11:27                     ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-21 11:27 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the rbtree
>>>>>>>>> gets
>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(), 
>>>>>>>>> the
>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>> schedule_pos
>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering the
>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>> 
>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>> 
>>>>>>>> I didn't write this patch, so please don't use my sign-off. I'll
>>>>>>>> add
>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>> first:
>>>>>>>> 
>>>>>>>>> ---
>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>> 
>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>> 
>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>> +
>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>> ieee80211_tx_rate
>>>>>>>>> *rate,
>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>  {
>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff 
>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>   *
>>>>>>>>>   * Should only be called between calls to
>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it 
>>>>>>>>> will
>>>>>>>>> be
>>>>>>>>> added
>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>> eligible.
>>>>>>>>> If a txq
>>>>>>>>>   * is returned, it should be returned with 
>>>>>>>>> ieee80211_return_txq()
>>>>>>>>> after the
>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>   *
>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>> Check
>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>   */
>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>> 
>>>>>>>>>  /**
>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>> + *
>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>> + *
>>>>>>>>> + */
>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed to
>>>>>>>>> transmit
>>>>>>>>>   *
>>>>>>>>>   * This function is used to check whether given txq is allowed 
>>>>>>>>> to
>>>>>>>>> transmit by
>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>  	unsigned long flags;
>>>>>>>>> 
>>>>>>>>>  	/* keep last! */
>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>> 
>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>> 
>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>> 
>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>  	}
>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>> 
>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>> +		  jiffies +
>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>> +
>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>> 
>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw)
>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>> 
>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>  #endif
>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>> 
>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>> 
>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw,
>>>>>>>>> 
>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>> 
>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>> +
>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>  		goto out;
>>>>>>>>> 
>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>  }
>>>>>>>>> 
>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>> +{
>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>> +
>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>> +
>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>> +	}
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>> 
>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>  }
>>>>>>>>> 
>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>> ieee80211_hw
>>>>>>>>> *hw,
>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>> 
>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>> +		list_add_tail(&txqi->candidate, 
>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>> +
>>>>>>>>>  }
>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>> 
>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, int
>>>>>>>>> ac)
>>>>>>>>> +{
>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>> +
>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>> +
>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>> +				 candidate) {
>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>> +
>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>> +		else
>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>> +	}
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>> +{
>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>> remove_timer);
>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>> +	int ac;
>>>>>>>>> +
>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>> +		  jiffies +
>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>> +}
>>>>>>>> 
>>>>>>>> I'll ask the same as I did last time (where you told me to hold 
>>>>>>>> off
>>>>>>>> until this round):
>>>>>>>> 
>>>>>>>> Why do you need the timer and the periodic check? If TXQs are 
>>>>>>>> added
>>>>>>>> to
>>>>>>>> the remove list during the scheduling run, and
>>>>>>>> __ieee80211_check_txqs()
>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>> list?
>>>>>>> Is it possible that a txq is not added to the remove list but then
>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>> disconnects
>>>>>>> without any notification.
>>>>>> 
>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>> __unschedule_txq(), that should remove stations from the scheduler
>>>>>> when
>>>>>> they disconnect etc.
>>>>> Yes, the disconnect scenario is a bad example. My concern is, say, 
>>>>> we
>>>>> have 10 stations and only one of them is assigned a very small 
>>>>> weight
>>>>> compared with that of others. Suppose, after its chance of Tx, it is
>>>>> most likely to be placed in the rightmost(still has some packets in
>>>>> the
>>>>> txq) and no more incoming data for it. The remaining packets in txq
>>>>> will
>>>>> be dropped due to timeout algo in codel(correct me if I am wrong) 
>>>>> but
>>>>> this empty txq will stay on the rbtree until other txqs get drained 
>>>>> or
>>>>> global vt catch up with its vt. The staying time could be long if
>>>>> weight
>>>>> is extremely small. Then do we need timer to check or any other 
>>>>> better
>>>>> solution?
>>>> 
>>>> Ah, I see what you mean. No, I don't think this will be a problem; 
>>>> the
>>>> scenario you're describing would play out like this:
>>>> 
>>>> 1. Station ends transmitting, still has a single packet queued, gets
>>>>    moved to the end of the rbtree (and stays there for a while).
>>>> 
>>>> 2. When we finally get to the point where this station gets another
>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>> packet
>>>>    is dropped[0]. This means that the queue will just be empty
>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>> 
>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put it
>>>>    back on the rbtree.
>>>> 
>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the point 
>>>> of
>>>> packet dequeue. But even if an empty queue stays on the rbtree for a
>>>> while, there is no harm in that: eventually it will get its turn, it
>>>> will turn out to be empty, and just be skipped over.
>>> Then that will be fine. Thanks for the explanation of the dropping 
>>> part
>>> in CoDel algorithm.
>> 
>> Yup, think so. And you're welcome :)
>> 
>>>> The issue we need to be concerned about is the opposite: If we have a
>>>> queue that *does* have packets queued, but which is *not* scheduled 
>>>> for
>>>> transmission, that will stall TX.
>>> Is it by design since its vt is more than global vt, right? The 
>>> lattency
>>> may somehow get impacted though.
>> 
>> Well, it should still stay on the rbtree as long as it has packets
>> queued. We don't have a check anywhere that reschedules TXQs whose v_t
>> drops below global v_t...
>> 
>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>> will
>>>> not clear out an entire queue with multiple packets in one go. But 
>>>> you
>>>> are right that it could conceivably drop the last packet in a queue.
>>>> 
>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>> (i.e.,
>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>> walk
>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>> 
>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to 
>>>>>> have
>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>> 
>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>> being
>>>>>> pointed to by schedule_pos[ac], and if it is, it updates 
>>>>>> schedule_pos
>>>>>> to
>>>>>> be the rb_next of the current value?
>>>>> Actually, if schedule_pos is updated to rb_next of the current 
>>>>> value,
>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>> finally pick the next node of the node we really want. Is it fine to
>>>>> update schedule_pos to NULL?
>>>> 
>>>> Hmm, yeah, good point.
>>>> 
>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>> scheduling round, that will make next_txq() "start over", and do
>>>> another
>>>> loop through the whole thing. I guess we may be able hit a case where
>>>> things can oscillate back and forth between addition and removal
>>>> resulting in an infinite loop? Not sure, but at least I can't seem to
>>>> convince myself that this can't happen.
>>> 
>>> As the loop of next_txq under lock protection as below,
>>> 
>>> txq_schedule_start();
>>> while(txq=next_txq()){
>>> ...
>>> return_txq(txq);
>>> }
>>> txq_schedule_end();
>>> 
>>> I do not see any chance of addition, no?
>> 
>> As you noted in your other email, Felix reduced the locking. And yeah,
>> we need to rebase this series to also incorporate that. I figure I can
>> send an updated version of the first patch in the series once we've
>> worked out the remaining issues with your follow-up patches.
>> 
> Oh, I was thinking we were discussing without locking reduced. Yes, I 
> also agree there might be a case causing infinite loop. With locking 
> reduced, the tree can be adjusted between next_txq() and return_txq() in 
> the loop situation. For further discussion, let 's consider,
> 1) the tree starts like:
>         A->B->C->D->E
> 2) then next_txq() returns A for dequeuing
> 3) driver dequeues A and draines A without any active txq locked meaning 
> the tree could be changed upon Tx compeletion.
> 4) then in return_txq(), the tree could be,
>         i   A->B->C->D->E (A is empty, and maybe soon be added back 
> before the loop end)
>         ii  B->C->A->D->E (A is empty, and maybe soon be added back 
> before the loop end)
>         iii B->C->D->E->A (A is empty, and maybe soon be added back 
> before the loop end)
>
> with this change:
>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>
> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in 
> next_txq(), rb_next(B) is what we returns which actually is C and B is 
> skipped, no?
>
> Similiar for case ii, we skip B, C, D.

Yup, I think you're right. But if we can fix this by making
ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
resort_txq() acts on the txq that's currently in schedule_pos, it will
update schedule pos with the same rb_next(node) ?: rb_prev(node);
(optionally after checking that the position of the node is actually
going to change).

> Also I am wondering if there will be some SMP issues relating with 
> local->schedule_pos[ac].

Not sure what you mean by this?

>>> In ath10k, we will usually push packets of first txq as many as we can
>>> until it is drained and then move to the next one. So if a txq gets
>>> removed in the return_txq, it should always be the leftmost. And
>>> during this period, neither vt of any station or global vt can be
>>> updated due to lock protection.
>>> 
>>>> 
>>>> But in that case, we could fix it by just conditionally assigning
>>>> either
>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>> something like:
>>>> 
>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>> I am not sure I am getting your point. Still in next_txq,
>>> schedule_pos[ac] will lead us to the next node of the one we want.
>> 
>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>> scheduling round, while the latter ends the current round.
>> 
>> -Toke
>
> -- 
> Yibo


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 11:27                     ` Toke Høiland-Jørgensen
@ 2019-09-21 11:53                       ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 11:53 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the 
>>>>>>>>>> rbtree
>>>>>>>>>> gets
>>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(),
>>>>>>>>>> the
>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>> schedule_pos
>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering 
>>>>>>>>>> the
>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>> 
>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>> 
>>>>>>>>> I didn't write this patch, so please don't use my sign-off. 
>>>>>>>>> I'll
>>>>>>>>> add
>>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>>> first:
>>>>>>>>> 
>>>>>>>>>> ---
>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>> 
>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>> 
>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>> +
>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>> *rate,
>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>  {
>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>   *
>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it
>>>>>>>>>> will
>>>>>>>>>> be
>>>>>>>>>> added
>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>> eligible.
>>>>>>>>>> If a txq
>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>> after the
>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>   *
>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> Check
>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>   */
>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 
>>>>>>>>>> ac)
>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>> 
>>>>>>>>>>  /**
>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>> + *
>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>> + *
>>>>>>>>>> + */
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>> +
>>>>>>>>>> +/**
>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed 
>>>>>>>>>> to
>>>>>>>>>> transmit
>>>>>>>>>>   *
>>>>>>>>>>   * This function is used to check whether given txq is 
>>>>>>>>>> allowed
>>>>>>>>>> to
>>>>>>>>>> transmit by
>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>> 
>>>>>>>>>>  	/* keep last! */
>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>> 
>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>> 
>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>> 
>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>  	}
>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>> 
>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +
>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>> 
>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw)
>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>> 
>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>  #endif
>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>> 
>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>> 
>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> 
>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>  		goto out;
>>>>>>>>>> 
>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> 
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>>  }
>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>> 
>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, 
>>>>>>>>>> int
>>>>>>>>>> ac)
>>>>>>>>>> +{
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>> +
>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>>> +				 candidate) {
>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>> +
>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>> +		else
>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>> remove_timer);
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +	int ac;
>>>>>>>>>> +
>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +	}
>>>>>>>>>> +
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +}
>>>>>>>>> 
>>>>>>>>> I'll ask the same as I did last time (where you told me to hold
>>>>>>>>> off
>>>>>>>>> until this round):
>>>>>>>>> 
>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>> added
>>>>>>>>> to
>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>>> list?
>>>>>>>> Is it possible that a txq is not added to the remove list but 
>>>>>>>> then
>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>> disconnects
>>>>>>>> without any notification.
>>>>>>> 
>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>> __unschedule_txq(), that should remove stations from the 
>>>>>>> scheduler
>>>>>>> when
>>>>>>> they disconnect etc.
>>>>>> Yes, the disconnect scenario is a bad example. My concern is, say,
>>>>>> we
>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>> weight
>>>>>> compared with that of others. Suppose, after its chance of Tx, it 
>>>>>> is
>>>>>> most likely to be placed in the rightmost(still has some packets 
>>>>>> in
>>>>>> the
>>>>>> txq) and no more incoming data for it. The remaining packets in 
>>>>>> txq
>>>>>> will
>>>>>> be dropped due to timeout algo in codel(correct me if I am wrong)
>>>>>> but
>>>>>> this empty txq will stay on the rbtree until other txqs get 
>>>>>> drained
>>>>>> or
>>>>>> global vt catch up with its vt. The staying time could be long if
>>>>>> weight
>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>> better
>>>>>> solution?
>>>>> 
>>>>> Ah, I see what you mean. No, I don't think this will be a problem;
>>>>> the
>>>>> scenario you're describing would play out like this:
>>>>> 
>>>>> 1. Station ends transmitting, still has a single packet queued, 
>>>>> gets
>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>> 
>>>>> 2. When we finally get to the point where this station gets another
>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>> packet
>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>> 
>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put 
>>>>> it
>>>>>    back on the rbtree.
>>>>> 
>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the 
>>>>> point
>>>>> of
>>>>> packet dequeue. But even if an empty queue stays on the rbtree for 
>>>>> a
>>>>> while, there is no harm in that: eventually it will get its turn, 
>>>>> it
>>>>> will turn out to be empty, and just be skipped over.
>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>> part
>>>> in CoDel algorithm.
>>> 
>>> Yup, think so. And you're welcome :)
>>> 
>>>>> The issue we need to be concerned about is the opposite: If we have 
>>>>> a
>>>>> queue that *does* have packets queued, but which is *not* scheduled
>>>>> for
>>>>> transmission, that will stall TX.
>>>> Is it by design since its vt is more than global vt, right? The
>>>> lattency
>>>> may somehow get impacted though.
>>> 
>>> Well, it should still stay on the rbtree as long as it has packets
>>> queued. We don't have a check anywhere that reschedules TXQs whose 
>>> v_t
>>> drops below global v_t...
>>> 
>>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>>> will
>>>>> not clear out an entire queue with multiple packets in one go. But
>>>>> you
>>>>> are right that it could conceivably drop the last packet in a 
>>>>> queue.
>>>>> 
>>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>>> (i.e.,
>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>>> walk
>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>> 
>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to
>>>>>>> have
>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>> 
>>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>>> being
>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>> schedule_pos
>>>>>>> to
>>>>>>> be the rb_next of the current value?
>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>> value,
>>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>>> finally pick the next node of the node we really want. Is it fine 
>>>>>> to
>>>>>> update schedule_pos to NULL?
>>>>> 
>>>>> Hmm, yeah, good point.
>>>>> 
>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>> another
>>>>> loop through the whole thing. I guess we may be able hit a case 
>>>>> where
>>>>> things can oscillate back and forth between addition and removal
>>>>> resulting in an infinite loop? Not sure, but at least I can't seem 
>>>>> to
>>>>> convince myself that this can't happen.
>>>> 
>>>> As the loop of next_txq under lock protection as below,
>>>> 
>>>> txq_schedule_start();
>>>> while(txq=next_txq()){
>>>> ...
>>>> return_txq(txq);
>>>> }
>>>> txq_schedule_end();
>>>> 
>>>> I do not see any chance of addition, no?
>>> 
>>> As you noted in your other email, Felix reduced the locking. And 
>>> yeah,
>>> we need to rebase this series to also incorporate that. I figure I 
>>> can
>>> send an updated version of the first patch in the series once we've
>>> worked out the remaining issues with your follow-up patches.
>>> 
>> Oh, I was thinking we were discussing without locking reduced. Yes, I
>> also agree there might be a case causing infinite loop. With locking
>> reduced, the tree can be adjusted between next_txq() and return_txq() 
>> in
>> the loop situation. For further discussion, let 's consider,
>> 1) the tree starts like:
>>         A->B->C->D->E
>> 2) then next_txq() returns A for dequeuing
>> 3) driver dequeues A and draines A without any active txq locked 
>> meaning
>> the tree could be changed upon Tx compeletion.
>> 4) then in return_txq(), the tree could be,
>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>> before the loop end)
>> 
>> with this change:
>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>> 
>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>> next_txq(), rb_next(B) is what we returns which actually is C and B is
>> skipped, no?
>> 
>> Similiar for case ii, we skip B, C, D.
> 
> Yup, I think you're right. But if we can fix this by making
> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
> resort_txq() acts on the txq that's currently in schedule_pos, it will
> update schedule pos with the same rb_next(node) ?: rb_prev(node);
> (optionally after checking that the position of the node is actually
> going to change).
> 
>> Also I am wondering if there will be some SMP issues relating with
>> local->schedule_pos[ac].
> 
> Not sure what you mean by this?
> 
>>>> In ath10k, we will usually push packets of first txq as many as we 
>>>> can
>>>> until it is drained and then move to the next one. So if a txq gets
>>>> removed in the return_txq, it should always be the leftmost. And
>>>> during this period, neither vt of any station or global vt can be
>>>> updated due to lock protection.
>>>> 
>>>>> 
>>>>> But in that case, we could fix it by just conditionally assigning
>>>>> either
>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>> something like:
>>>>> 
>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>> I am not sure I am getting your point. Still in next_txq,
>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>> 
>>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>> scheduling round, while the latter ends the current round.
>>> 
>>> -Toke
>> 
>> --
>> Yibo

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 11:53                       ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 11:53 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the 
>>>>>>>>>> rbtree
>>>>>>>>>> gets
>>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(),
>>>>>>>>>> the
>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>> schedule_pos
>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering 
>>>>>>>>>> the
>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>> 
>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>> 
>>>>>>>>> I didn't write this patch, so please don't use my sign-off. 
>>>>>>>>> I'll
>>>>>>>>> add
>>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>>> first:
>>>>>>>>> 
>>>>>>>>>> ---
>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>> 
>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>> 
>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>> +
>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>> *rate,
>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>  {
>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>   *
>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it
>>>>>>>>>> will
>>>>>>>>>> be
>>>>>>>>>> added
>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>> eligible.
>>>>>>>>>> If a txq
>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>> after the
>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>   *
>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> Check
>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>   */
>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 
>>>>>>>>>> ac)
>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>> 
>>>>>>>>>>  /**
>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>> + *
>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>> + *
>>>>>>>>>> + */
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>> +
>>>>>>>>>> +/**
>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed 
>>>>>>>>>> to
>>>>>>>>>> transmit
>>>>>>>>>>   *
>>>>>>>>>>   * This function is used to check whether given txq is 
>>>>>>>>>> allowed
>>>>>>>>>> to
>>>>>>>>>> transmit by
>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>> 
>>>>>>>>>>  	/* keep last! */
>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>> 
>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>> 
>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>> 
>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>  	}
>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>> 
>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +
>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>> 
>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw)
>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>> 
>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>  #endif
>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>> 
>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>> 
>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> 
>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>  		goto out;
>>>>>>>>>> 
>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> 
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>>  }
>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>> 
>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, 
>>>>>>>>>> int
>>>>>>>>>> ac)
>>>>>>>>>> +{
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>> +
>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>>> +				 candidate) {
>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>> +
>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>> +		else
>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>> remove_timer);
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +	int ac;
>>>>>>>>>> +
>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +	}
>>>>>>>>>> +
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +}
>>>>>>>>> 
>>>>>>>>> I'll ask the same as I did last time (where you told me to hold
>>>>>>>>> off
>>>>>>>>> until this round):
>>>>>>>>> 
>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>> added
>>>>>>>>> to
>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>>> list?
>>>>>>>> Is it possible that a txq is not added to the remove list but 
>>>>>>>> then
>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>> disconnects
>>>>>>>> without any notification.
>>>>>>> 
>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>> __unschedule_txq(), that should remove stations from the 
>>>>>>> scheduler
>>>>>>> when
>>>>>>> they disconnect etc.
>>>>>> Yes, the disconnect scenario is a bad example. My concern is, say,
>>>>>> we
>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>> weight
>>>>>> compared with that of others. Suppose, after its chance of Tx, it 
>>>>>> is
>>>>>> most likely to be placed in the rightmost(still has some packets 
>>>>>> in
>>>>>> the
>>>>>> txq) and no more incoming data for it. The remaining packets in 
>>>>>> txq
>>>>>> will
>>>>>> be dropped due to timeout algo in codel(correct me if I am wrong)
>>>>>> but
>>>>>> this empty txq will stay on the rbtree until other txqs get 
>>>>>> drained
>>>>>> or
>>>>>> global vt catch up with its vt. The staying time could be long if
>>>>>> weight
>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>> better
>>>>>> solution?
>>>>> 
>>>>> Ah, I see what you mean. No, I don't think this will be a problem;
>>>>> the
>>>>> scenario you're describing would play out like this:
>>>>> 
>>>>> 1. Station ends transmitting, still has a single packet queued, 
>>>>> gets
>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>> 
>>>>> 2. When we finally get to the point where this station gets another
>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>> packet
>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>> 
>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put 
>>>>> it
>>>>>    back on the rbtree.
>>>>> 
>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the 
>>>>> point
>>>>> of
>>>>> packet dequeue. But even if an empty queue stays on the rbtree for 
>>>>> a
>>>>> while, there is no harm in that: eventually it will get its turn, 
>>>>> it
>>>>> will turn out to be empty, and just be skipped over.
>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>> part
>>>> in CoDel algorithm.
>>> 
>>> Yup, think so. And you're welcome :)
>>> 
>>>>> The issue we need to be concerned about is the opposite: If we have 
>>>>> a
>>>>> queue that *does* have packets queued, but which is *not* scheduled
>>>>> for
>>>>> transmission, that will stall TX.
>>>> Is it by design since its vt is more than global vt, right? The
>>>> lattency
>>>> may somehow get impacted though.
>>> 
>>> Well, it should still stay on the rbtree as long as it has packets
>>> queued. We don't have a check anywhere that reschedules TXQs whose 
>>> v_t
>>> drops below global v_t...
>>> 
>>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>>> will
>>>>> not clear out an entire queue with multiple packets in one go. But
>>>>> you
>>>>> are right that it could conceivably drop the last packet in a 
>>>>> queue.
>>>>> 
>>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>>> (i.e.,
>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>>> walk
>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>> 
>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to
>>>>>>> have
>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>> 
>>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>>> being
>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>> schedule_pos
>>>>>>> to
>>>>>>> be the rb_next of the current value?
>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>> value,
>>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>>> finally pick the next node of the node we really want. Is it fine 
>>>>>> to
>>>>>> update schedule_pos to NULL?
>>>>> 
>>>>> Hmm, yeah, good point.
>>>>> 
>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>> another
>>>>> loop through the whole thing. I guess we may be able hit a case 
>>>>> where
>>>>> things can oscillate back and forth between addition and removal
>>>>> resulting in an infinite loop? Not sure, but at least I can't seem 
>>>>> to
>>>>> convince myself that this can't happen.
>>>> 
>>>> As the loop of next_txq under lock protection as below,
>>>> 
>>>> txq_schedule_start();
>>>> while(txq=next_txq()){
>>>> ...
>>>> return_txq(txq);
>>>> }
>>>> txq_schedule_end();
>>>> 
>>>> I do not see any chance of addition, no?
>>> 
>>> As you noted in your other email, Felix reduced the locking. And 
>>> yeah,
>>> we need to rebase this series to also incorporate that. I figure I 
>>> can
>>> send an updated version of the first patch in the series once we've
>>> worked out the remaining issues with your follow-up patches.
>>> 
>> Oh, I was thinking we were discussing without locking reduced. Yes, I
>> also agree there might be a case causing infinite loop. With locking
>> reduced, the tree can be adjusted between next_txq() and return_txq() 
>> in
>> the loop situation. For further discussion, let 's consider,
>> 1) the tree starts like:
>>         A->B->C->D->E
>> 2) then next_txq() returns A for dequeuing
>> 3) driver dequeues A and draines A without any active txq locked 
>> meaning
>> the tree could be changed upon Tx compeletion.
>> 4) then in return_txq(), the tree could be,
>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>> before the loop end)
>> 
>> with this change:
>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>> 
>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>> next_txq(), rb_next(B) is what we returns which actually is C and B is
>> skipped, no?
>> 
>> Similiar for case ii, we skip B, C, D.
> 
> Yup, I think you're right. But if we can fix this by making
> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
> resort_txq() acts on the txq that's currently in schedule_pos, it will
> update schedule pos with the same rb_next(node) ?: rb_prev(node);
> (optionally after checking that the position of the node is actually
> going to change).
> 
>> Also I am wondering if there will be some SMP issues relating with
>> local->schedule_pos[ac].
> 
> Not sure what you mean by this?
> 
>>>> In ath10k, we will usually push packets of first txq as many as we 
>>>> can
>>>> until it is drained and then move to the next one. So if a txq gets
>>>> removed in the return_txq, it should always be the leftmost. And
>>>> during this period, neither vt of any station or global vt can be
>>>> updated due to lock protection.
>>>> 
>>>>> 
>>>>> But in that case, we could fix it by just conditionally assigning
>>>>> either
>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>> something like:
>>>>> 
>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>> I am not sure I am getting your point. Still in next_txq,
>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>> 
>>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>> scheduling round, while the latter ends the current round.
>>> 
>>> -Toke
>> 
>> --
>> Yibo

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 11:27                     ` Toke Høiland-Jørgensen
@ 2019-09-21 12:22                       ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 12:22 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the 
>>>>>>>>>> rbtree
>>>>>>>>>> gets
>>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(),
>>>>>>>>>> the
>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>> schedule_pos
>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering 
>>>>>>>>>> the
>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>> 
>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>> 
>>>>>>>>> I didn't write this patch, so please don't use my sign-off. 
>>>>>>>>> I'll
>>>>>>>>> add
>>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>>> first:
>>>>>>>>> 
>>>>>>>>>> ---
>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>> 
>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>> 
>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>> +
>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>> *rate,
>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>  {
>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>   *
>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it
>>>>>>>>>> will
>>>>>>>>>> be
>>>>>>>>>> added
>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>> eligible.
>>>>>>>>>> If a txq
>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>> after the
>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>   *
>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> Check
>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>   */
>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 
>>>>>>>>>> ac)
>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>> 
>>>>>>>>>>  /**
>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>> + *
>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>> + *
>>>>>>>>>> + */
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>> +
>>>>>>>>>> +/**
>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed 
>>>>>>>>>> to
>>>>>>>>>> transmit
>>>>>>>>>>   *
>>>>>>>>>>   * This function is used to check whether given txq is 
>>>>>>>>>> allowed
>>>>>>>>>> to
>>>>>>>>>> transmit by
>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>> 
>>>>>>>>>>  	/* keep last! */
>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>> 
>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>> 
>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>> 
>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>  	}
>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>> 
>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +
>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>> 
>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw)
>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>> 
>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>  #endif
>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>> 
>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>> 
>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> 
>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>  		goto out;
>>>>>>>>>> 
>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> 
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>>  }
>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>> 
>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, 
>>>>>>>>>> int
>>>>>>>>>> ac)
>>>>>>>>>> +{
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>> +
>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>>> +				 candidate) {
>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>> +
>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>> +		else
>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>> remove_timer);
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +	int ac;
>>>>>>>>>> +
>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +	}
>>>>>>>>>> +
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +}
>>>>>>>>> 
>>>>>>>>> I'll ask the same as I did last time (where you told me to hold
>>>>>>>>> off
>>>>>>>>> until this round):
>>>>>>>>> 
>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>> added
>>>>>>>>> to
>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>>> list?
>>>>>>>> Is it possible that a txq is not added to the remove list but 
>>>>>>>> then
>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>> disconnects
>>>>>>>> without any notification.
>>>>>>> 
>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>> __unschedule_txq(), that should remove stations from the 
>>>>>>> scheduler
>>>>>>> when
>>>>>>> they disconnect etc.
>>>>>> Yes, the disconnect scenario is a bad example. My concern is, say,
>>>>>> we
>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>> weight
>>>>>> compared with that of others. Suppose, after its chance of Tx, it 
>>>>>> is
>>>>>> most likely to be placed in the rightmost(still has some packets 
>>>>>> in
>>>>>> the
>>>>>> txq) and no more incoming data for it. The remaining packets in 
>>>>>> txq
>>>>>> will
>>>>>> be dropped due to timeout algo in codel(correct me if I am wrong)
>>>>>> but
>>>>>> this empty txq will stay on the rbtree until other txqs get 
>>>>>> drained
>>>>>> or
>>>>>> global vt catch up with its vt. The staying time could be long if
>>>>>> weight
>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>> better
>>>>>> solution?
>>>>> 
>>>>> Ah, I see what you mean. No, I don't think this will be a problem;
>>>>> the
>>>>> scenario you're describing would play out like this:
>>>>> 
>>>>> 1. Station ends transmitting, still has a single packet queued, 
>>>>> gets
>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>> 
>>>>> 2. When we finally get to the point where this station gets another
>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>> packet
>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>> 
>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put 
>>>>> it
>>>>>    back on the rbtree.
>>>>> 
>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the 
>>>>> point
>>>>> of
>>>>> packet dequeue. But even if an empty queue stays on the rbtree for 
>>>>> a
>>>>> while, there is no harm in that: eventually it will get its turn, 
>>>>> it
>>>>> will turn out to be empty, and just be skipped over.
>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>> part
>>>> in CoDel algorithm.
>>> 
>>> Yup, think so. And you're welcome :)
>>> 
>>>>> The issue we need to be concerned about is the opposite: If we have 
>>>>> a
>>>>> queue that *does* have packets queued, but which is *not* scheduled
>>>>> for
>>>>> transmission, that will stall TX.
>>>> Is it by design since its vt is more than global vt, right? The
>>>> lattency
>>>> may somehow get impacted though.
>>> 
>>> Well, it should still stay on the rbtree as long as it has packets
>>> queued. We don't have a check anywhere that reschedules TXQs whose 
>>> v_t
>>> drops below global v_t...
>>> 
>>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>>> will
>>>>> not clear out an entire queue with multiple packets in one go. But
>>>>> you
>>>>> are right that it could conceivably drop the last packet in a 
>>>>> queue.
>>>>> 
>>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>>> (i.e.,
>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>>> walk
>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>> 
>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to
>>>>>>> have
>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>> 
>>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>>> being
>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>> schedule_pos
>>>>>>> to
>>>>>>> be the rb_next of the current value?
>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>> value,
>>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>>> finally pick the next node of the node we really want. Is it fine 
>>>>>> to
>>>>>> update schedule_pos to NULL?
>>>>> 
>>>>> Hmm, yeah, good point.
>>>>> 
>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>> another
>>>>> loop through the whole thing. I guess we may be able hit a case 
>>>>> where
>>>>> things can oscillate back and forth between addition and removal
>>>>> resulting in an infinite loop? Not sure, but at least I can't seem 
>>>>> to
>>>>> convince myself that this can't happen.
>>>> 
>>>> As the loop of next_txq under lock protection as below,
>>>> 
>>>> txq_schedule_start();
>>>> while(txq=next_txq()){
>>>> ...
>>>> return_txq(txq);
>>>> }
>>>> txq_schedule_end();
>>>> 
>>>> I do not see any chance of addition, no?
>>> 
>>> As you noted in your other email, Felix reduced the locking. And 
>>> yeah,
>>> we need to rebase this series to also incorporate that. I figure I 
>>> can
>>> send an updated version of the first patch in the series once we've
>>> worked out the remaining issues with your follow-up patches.
>>> 
>> Oh, I was thinking we were discussing without locking reduced. Yes, I
>> also agree there might be a case causing infinite loop. With locking
>> reduced, the tree can be adjusted between next_txq() and return_txq() 
>> in
>> the loop situation. For further discussion, let 's consider,
>> 1) the tree starts like:
>>         A->B->C->D->E
>> 2) then next_txq() returns A for dequeuing
>> 3) driver dequeues A and draines A without any active txq locked 
>> meaning
>> the tree could be changed upon Tx compeletion.
>> 4) then in return_txq(), the tree could be,
>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>> before the loop end)
>> 
>> with this change:
>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>> 
>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>> next_txq(), rb_next(B) is what we returns which actually is C and B is
>> skipped, no?
>> 
>> Similiar for case ii, we skip B, C, D.
> 
> Yup, I think you're right. But if we can fix this by making
> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
> resort_txq() acts on the txq that's currently in schedule_pos, it will
> update schedule pos with the same rb_next(node) ?: rb_prev(node);
> (optionally after checking that the position of the node is actually
> going to change).
Sorry, please igore last email sent by mistake.

I don't think it makes any difference with that in unschedule_txq(). For 
case i, it finally picks C as well in next_txq(). For next_txq(), 
schedule_pos means previous candidate node whereas with your change, it 
looks like schedule_pos is current candidate node instead.



>> Also I am wondering if there will be some SMP issues relating with
>> local->schedule_pos[ac].
> 
> Not sure what you mean by this?
My bad. Please ignore this.


> 
>>>> In ath10k, we will usually push packets of first txq as many as we 
>>>> can
>>>> until it is drained and then move to the next one. So if a txq gets
>>>> removed in the return_txq, it should always be the leftmost. And
>>>> during this period, neither vt of any station or global vt can be
>>>> updated due to lock protection.
>>>> 
>>>>> 
>>>>> But in that case, we could fix it by just conditionally assigning
>>>>> either
>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>> something like:
>>>>> 
>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>> I am not sure I am getting your point. Still in next_txq,
>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>> 
>>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>> scheduling round, while the latter ends the current round.
>>> 
>>> -Toke
>> 
>> --
>> Yibo

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 12:22                       ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 12:22 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the 
>>>>>>>>>> rbtree
>>>>>>>>>> gets
>>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(),
>>>>>>>>>> the
>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>> schedule_pos
>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering 
>>>>>>>>>> the
>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>> 
>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>> 
>>>>>>>>> I didn't write this patch, so please don't use my sign-off. 
>>>>>>>>> I'll
>>>>>>>>> add
>>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>>> first:
>>>>>>>>> 
>>>>>>>>>> ---
>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>> 
>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>> 
>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>> +
>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>> *rate,
>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>  {
>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>   *
>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it
>>>>>>>>>> will
>>>>>>>>>> be
>>>>>>>>>> added
>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>> eligible.
>>>>>>>>>> If a txq
>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>> after the
>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>   *
>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>> Check
>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>   */
>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 
>>>>>>>>>> ac)
>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>> 
>>>>>>>>>>  /**
>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>> + *
>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>> + *
>>>>>>>>>> + */
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>> +
>>>>>>>>>> +/**
>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed 
>>>>>>>>>> to
>>>>>>>>>> transmit
>>>>>>>>>>   *
>>>>>>>>>>   * This function is used to check whether given txq is 
>>>>>>>>>> allowed
>>>>>>>>>> to
>>>>>>>>>> transmit by
>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>> 
>>>>>>>>>>  	/* keep last! */
>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>> 
>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>> 
>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>> 
>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>  	}
>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>> 
>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +
>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>> 
>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw)
>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>> 
>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>  #endif
>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>> 
>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>> 
>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> 
>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>  		goto out;
>>>>>>>>>> 
>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>> 
>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>  }
>>>>>>>>>> 
>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>> ieee80211_hw
>>>>>>>>>> *hw,
>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>> 
>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>> +
>>>>>>>>>>  }
>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>> 
>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, 
>>>>>>>>>> int
>>>>>>>>>> ac)
>>>>>>>>>> +{
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +
>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>> +
>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>>> +				 candidate) {
>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>> +
>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>> +		else
>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>> +	}
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>> +{
>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>> remove_timer);
>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>> +	int ac;
>>>>>>>>>> +
>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>> +	}
>>>>>>>>>> +
>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>> +		  jiffies +
>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>> +}
>>>>>>>>> 
>>>>>>>>> I'll ask the same as I did last time (where you told me to hold
>>>>>>>>> off
>>>>>>>>> until this round):
>>>>>>>>> 
>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>> added
>>>>>>>>> to
>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>>> list?
>>>>>>>> Is it possible that a txq is not added to the remove list but 
>>>>>>>> then
>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>> disconnects
>>>>>>>> without any notification.
>>>>>>> 
>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>> __unschedule_txq(), that should remove stations from the 
>>>>>>> scheduler
>>>>>>> when
>>>>>>> they disconnect etc.
>>>>>> Yes, the disconnect scenario is a bad example. My concern is, say,
>>>>>> we
>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>> weight
>>>>>> compared with that of others. Suppose, after its chance of Tx, it 
>>>>>> is
>>>>>> most likely to be placed in the rightmost(still has some packets 
>>>>>> in
>>>>>> the
>>>>>> txq) and no more incoming data for it. The remaining packets in 
>>>>>> txq
>>>>>> will
>>>>>> be dropped due to timeout algo in codel(correct me if I am wrong)
>>>>>> but
>>>>>> this empty txq will stay on the rbtree until other txqs get 
>>>>>> drained
>>>>>> or
>>>>>> global vt catch up with its vt. The staying time could be long if
>>>>>> weight
>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>> better
>>>>>> solution?
>>>>> 
>>>>> Ah, I see what you mean. No, I don't think this will be a problem;
>>>>> the
>>>>> scenario you're describing would play out like this:
>>>>> 
>>>>> 1. Station ends transmitting, still has a single packet queued, 
>>>>> gets
>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>> 
>>>>> 2. When we finally get to the point where this station gets another
>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>> packet
>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>> 
>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put 
>>>>> it
>>>>>    back on the rbtree.
>>>>> 
>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the 
>>>>> point
>>>>> of
>>>>> packet dequeue. But even if an empty queue stays on the rbtree for 
>>>>> a
>>>>> while, there is no harm in that: eventually it will get its turn, 
>>>>> it
>>>>> will turn out to be empty, and just be skipped over.
>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>> part
>>>> in CoDel algorithm.
>>> 
>>> Yup, think so. And you're welcome :)
>>> 
>>>>> The issue we need to be concerned about is the opposite: If we have 
>>>>> a
>>>>> queue that *does* have packets queued, but which is *not* scheduled
>>>>> for
>>>>> transmission, that will stall TX.
>>>> Is it by design since its vt is more than global vt, right? The
>>>> lattency
>>>> may somehow get impacted though.
>>> 
>>> Well, it should still stay on the rbtree as long as it has packets
>>> queued. We don't have a check anywhere that reschedules TXQs whose 
>>> v_t
>>> drops below global v_t...
>>> 
>>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>>> will
>>>>> not clear out an entire queue with multiple packets in one go. But
>>>>> you
>>>>> are right that it could conceivably drop the last packet in a 
>>>>> queue.
>>>>> 
>>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>>> (i.e.,
>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>>> walk
>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>> 
>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to
>>>>>>> have
>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>> 
>>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>>> being
>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>> schedule_pos
>>>>>>> to
>>>>>>> be the rb_next of the current value?
>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>> value,
>>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>>> finally pick the next node of the node we really want. Is it fine 
>>>>>> to
>>>>>> update schedule_pos to NULL?
>>>>> 
>>>>> Hmm, yeah, good point.
>>>>> 
>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>> another
>>>>> loop through the whole thing. I guess we may be able hit a case 
>>>>> where
>>>>> things can oscillate back and forth between addition and removal
>>>>> resulting in an infinite loop? Not sure, but at least I can't seem 
>>>>> to
>>>>> convince myself that this can't happen.
>>>> 
>>>> As the loop of next_txq under lock protection as below,
>>>> 
>>>> txq_schedule_start();
>>>> while(txq=next_txq()){
>>>> ...
>>>> return_txq(txq);
>>>> }
>>>> txq_schedule_end();
>>>> 
>>>> I do not see any chance of addition, no?
>>> 
>>> As you noted in your other email, Felix reduced the locking. And 
>>> yeah,
>>> we need to rebase this series to also incorporate that. I figure I 
>>> can
>>> send an updated version of the first patch in the series once we've
>>> worked out the remaining issues with your follow-up patches.
>>> 
>> Oh, I was thinking we were discussing without locking reduced. Yes, I
>> also agree there might be a case causing infinite loop. With locking
>> reduced, the tree can be adjusted between next_txq() and return_txq() 
>> in
>> the loop situation. For further discussion, let 's consider,
>> 1) the tree starts like:
>>         A->B->C->D->E
>> 2) then next_txq() returns A for dequeuing
>> 3) driver dequeues A and draines A without any active txq locked 
>> meaning
>> the tree could be changed upon Tx compeletion.
>> 4) then in return_txq(), the tree could be,
>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>> before the loop end)
>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>> before the loop end)
>> 
>> with this change:
>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>> 
>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>> next_txq(), rb_next(B) is what we returns which actually is C and B is
>> skipped, no?
>> 
>> Similiar for case ii, we skip B, C, D.
> 
> Yup, I think you're right. But if we can fix this by making
> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
> resort_txq() acts on the txq that's currently in schedule_pos, it will
> update schedule pos with the same rb_next(node) ?: rb_prev(node);
> (optionally after checking that the position of the node is actually
> going to change).
Sorry, please igore last email sent by mistake.

I don't think it makes any difference with that in unschedule_txq(). For 
case i, it finally picks C as well in next_txq(). For next_txq(), 
schedule_pos means previous candidate node whereas with your change, it 
looks like schedule_pos is current candidate node instead.



>> Also I am wondering if there will be some SMP issues relating with
>> local->schedule_pos[ac].
> 
> Not sure what you mean by this?
My bad. Please ignore this.


> 
>>>> In ath10k, we will usually push packets of first txq as many as we 
>>>> can
>>>> until it is drained and then move to the next one. So if a txq gets
>>>> removed in the return_txq, it should always be the leftmost. And
>>>> during this period, neither vt of any station or global vt can be
>>>> updated due to lock protection.
>>>> 
>>>>> 
>>>>> But in that case, we could fix it by just conditionally assigning
>>>>> either
>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>> something like:
>>>>> 
>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>> I am not sure I am getting your point. Still in next_txq,
>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>> 
>>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>> scheduling round, while the latter ends the current round.
>>> 
>>> -Toke
>> 
>> --
>> Yibo

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 12:22                       ` Yibo Zhao
@ 2019-09-21 13:02                         ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-21 13:02 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the 
>>>>>>>>>>> rbtree
>>>>>>>>>>> gets
>>>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(),
>>>>>>>>>>> the
>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>> schedule_pos
>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering 
>>>>>>>>>>> the
>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>> 
>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>> 
>>>>>>>>>> I didn't write this patch, so please don't use my sign-off. 
>>>>>>>>>> I'll
>>>>>>>>>> add
>>>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>>>> first:
>>>>>>>>>> 
>>>>>>>>>>> ---
>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>> 
>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>> 
>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>> +
>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>> *rate,
>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>  {
>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>   *
>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it
>>>>>>>>>>> will
>>>>>>>>>>> be
>>>>>>>>>>> added
>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>>> eligible.
>>>>>>>>>>> If a txq
>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>> after the
>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>   *
>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>> Check
>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>   */
>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 
>>>>>>>>>>> ac)
>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>> 
>>>>>>>>>>>  /**
>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>> + *
>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>> + *
>>>>>>>>>>> + */
>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>> +
>>>>>>>>>>> +/**
>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed 
>>>>>>>>>>> to
>>>>>>>>>>> transmit
>>>>>>>>>>>   *
>>>>>>>>>>>   * This function is used to check whether given txq is 
>>>>>>>>>>> allowed
>>>>>>>>>>> to
>>>>>>>>>>> transmit by
>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>> 
>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>> 
>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>> 
>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>> 
>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>  	}
>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>> 
>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>> +
>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>> 
>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw)
>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>> 
>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>  #endif
>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>> 
>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>> 
>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw,
>>>>>>>>>>> 
>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>> 
>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>> +
>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>  		goto out;
>>>>>>>>>>> 
>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>  }
>>>>>>>>>>> 
>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>> +
>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>> +	}
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>> 
>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>  }
>>>>>>>>>>> 
>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw,
>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>> 
>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>> +
>>>>>>>>>>>  }
>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>> 
>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, 
>>>>>>>>>>> int
>>>>>>>>>>> ac)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>> +
>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>> +
>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>>> +
>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>> +		else
>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>> +	}
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>> remove_timer);
>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>> +	int ac;
>>>>>>>>>>> +
>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>> +	}
>>>>>>>>>>> +
>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>> +}
>>>>>>>>>> 
>>>>>>>>>> I'll ask the same as I did last time (where you told me to hold
>>>>>>>>>> off
>>>>>>>>>> until this round):
>>>>>>>>>> 
>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>>> added
>>>>>>>>>> to
>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>>>> list?
>>>>>>>>> Is it possible that a txq is not added to the remove list but 
>>>>>>>>> then
>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>> disconnects
>>>>>>>>> without any notification.
>>>>>>>> 
>>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>>> __unschedule_txq(), that should remove stations from the 
>>>>>>>> scheduler
>>>>>>>> when
>>>>>>>> they disconnect etc.
>>>>>>> Yes, the disconnect scenario is a bad example. My concern is, say,
>>>>>>> we
>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>> weight
>>>>>>> compared with that of others. Suppose, after its chance of Tx, it 
>>>>>>> is
>>>>>>> most likely to be placed in the rightmost(still has some packets 
>>>>>>> in
>>>>>>> the
>>>>>>> txq) and no more incoming data for it. The remaining packets in 
>>>>>>> txq
>>>>>>> will
>>>>>>> be dropped due to timeout algo in codel(correct me if I am wrong)
>>>>>>> but
>>>>>>> this empty txq will stay on the rbtree until other txqs get 
>>>>>>> drained
>>>>>>> or
>>>>>>> global vt catch up with its vt. The staying time could be long if
>>>>>>> weight
>>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>>> better
>>>>>>> solution?
>>>>>> 
>>>>>> Ah, I see what you mean. No, I don't think this will be a problem;
>>>>>> the
>>>>>> scenario you're describing would play out like this:
>>>>>> 
>>>>>> 1. Station ends transmitting, still has a single packet queued, 
>>>>>> gets
>>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>>> 
>>>>>> 2. When we finally get to the point where this station gets another
>>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>>> packet
>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>> 
>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put 
>>>>>> it
>>>>>>    back on the rbtree.
>>>>>> 
>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the 
>>>>>> point
>>>>>> of
>>>>>> packet dequeue. But even if an empty queue stays on the rbtree for 
>>>>>> a
>>>>>> while, there is no harm in that: eventually it will get its turn, 
>>>>>> it
>>>>>> will turn out to be empty, and just be skipped over.
>>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>>> part
>>>>> in CoDel algorithm.
>>>> 
>>>> Yup, think so. And you're welcome :)
>>>> 
>>>>>> The issue we need to be concerned about is the opposite: If we have 
>>>>>> a
>>>>>> queue that *does* have packets queued, but which is *not* scheduled
>>>>>> for
>>>>>> transmission, that will stall TX.
>>>>> Is it by design since its vt is more than global vt, right? The
>>>>> lattency
>>>>> may somehow get impacted though.
>>>> 
>>>> Well, it should still stay on the rbtree as long as it has packets
>>>> queued. We don't have a check anywhere that reschedules TXQs whose 
>>>> v_t
>>>> drops below global v_t...
>>>> 
>>>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>>>> will
>>>>>> not clear out an entire queue with multiple packets in one go. But
>>>>>> you
>>>>>> are right that it could conceivably drop the last packet in a 
>>>>>> queue.
>>>>>> 
>>>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>>>> (i.e.,
>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>>>> walk
>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>> 
>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to
>>>>>>>> have
>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>> 
>>>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>>>> being
>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>> schedule_pos
>>>>>>>> to
>>>>>>>> be the rb_next of the current value?
>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>> value,
>>>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>>>> finally pick the next node of the node we really want. Is it fine 
>>>>>>> to
>>>>>>> update schedule_pos to NULL?
>>>>>> 
>>>>>> Hmm, yeah, good point.
>>>>>> 
>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>>> another
>>>>>> loop through the whole thing. I guess we may be able hit a case 
>>>>>> where
>>>>>> things can oscillate back and forth between addition and removal
>>>>>> resulting in an infinite loop? Not sure, but at least I can't seem 
>>>>>> to
>>>>>> convince myself that this can't happen.
>>>>> 
>>>>> As the loop of next_txq under lock protection as below,
>>>>> 
>>>>> txq_schedule_start();
>>>>> while(txq=next_txq()){
>>>>> ...
>>>>> return_txq(txq);
>>>>> }
>>>>> txq_schedule_end();
>>>>> 
>>>>> I do not see any chance of addition, no?
>>>> 
>>>> As you noted in your other email, Felix reduced the locking. And 
>>>> yeah,
>>>> we need to rebase this series to also incorporate that. I figure I 
>>>> can
>>>> send an updated version of the first patch in the series once we've
>>>> worked out the remaining issues with your follow-up patches.
>>>> 
>>> Oh, I was thinking we were discussing without locking reduced. Yes, I
>>> also agree there might be a case causing infinite loop. With locking
>>> reduced, the tree can be adjusted between next_txq() and return_txq() 
>>> in
>>> the loop situation. For further discussion, let 's consider,
>>> 1) the tree starts like:
>>>         A->B->C->D->E
>>> 2) then next_txq() returns A for dequeuing
>>> 3) driver dequeues A and draines A without any active txq locked 
>>> meaning
>>> the tree could be changed upon Tx compeletion.
>>> 4) then in return_txq(), the tree could be,
>>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>>> before the loop end)
>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>>> before the loop end)
>>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>>> before the loop end)
>>> 
>>> with this change:
>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>> 
>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>>> next_txq(), rb_next(B) is what we returns which actually is C and B is
>>> skipped, no?
>>> 
>>> Similiar for case ii, we skip B, C, D.
>> 
>> Yup, I think you're right. But if we can fix this by making
>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
>> resort_txq() acts on the txq that's currently in schedule_pos, it will
>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>> (optionally after checking that the position of the node is actually
>> going to change).
> Sorry, please igore last email sent by mistake.
>
> I don't think it makes any difference with that in unschedule_txq(). For 
> case i, it finally picks C as well in next_txq(). For next_txq(), 
> schedule_pos means previous candidate node whereas with your change, it 
> looks like schedule_pos is current candidate node instead.

Hmm, that was not actually what I was thinking, but yeah I think you're
right that it would be easier to just change it so schedule_pos is
pointing to the next and not the current txq we want to schedule.

We'd still need a check in resort_txq() then, but it would make it safe
to unschedule in return_txq()...

>>> Also I am wondering if there will be some SMP issues relating with
>>> local->schedule_pos[ac].
>> 
>> Not sure what you mean by this?
> My bad. Please ignore this.
>
>
>> 
>>>>> In ath10k, we will usually push packets of first txq as many as we 
>>>>> can
>>>>> until it is drained and then move to the next one. So if a txq gets
>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>> during this period, neither vt of any station or global vt can be
>>>>> updated due to lock protection.
>>>>> 
>>>>>> 
>>>>>> But in that case, we could fix it by just conditionally assigning
>>>>>> either
>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>>> something like:
>>>>>> 
>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>>> 
>>>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>> scheduling round, while the latter ends the current round.
>>>> 
>>>> -Toke
>>> 
>>> --
>>> Yibo
>
> -- 
> Yibo


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 13:02                         ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-21 13:02 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the 
>>>>>>>>>>> rbtree
>>>>>>>>>>> gets
>>>>>>>>>>> removed from rbtree immediately in the ieee80211_return_txq(),
>>>>>>>>>>> the
>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>> schedule_pos
>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering 
>>>>>>>>>>> the
>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>> 
>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>> 
>>>>>>>>>> I didn't write this patch, so please don't use my sign-off. 
>>>>>>>>>> I'll
>>>>>>>>>> add
>>>>>>>>>> ack or review tags as appropriate in reply; but a few comments
>>>>>>>>>> first:
>>>>>>>>>> 
>>>>>>>>>>> ---
>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>> 
>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>> 
>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>> +
>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>> *rate,
>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>  {
>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>   *
>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, it
>>>>>>>>>>> will
>>>>>>>>>>> be
>>>>>>>>>>> added
>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>>> eligible.
>>>>>>>>>>> If a txq
>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>> after the
>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void ieee80211_txq_schedule_start(struct
>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>   *
>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>> Check
>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>   */
>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 
>>>>>>>>>>> ac)
>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>> 
>>>>>>>>>>>  /**
>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>> + *
>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>> + *
>>>>>>>>>>> + */
>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>> +
>>>>>>>>>>> +/**
>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is allowed 
>>>>>>>>>>> to
>>>>>>>>>>> transmit
>>>>>>>>>>>   *
>>>>>>>>>>>   * This function is used to check whether given txq is 
>>>>>>>>>>> allowed
>>>>>>>>>>> to
>>>>>>>>>>> transmit by
>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>> 
>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>> 
>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>> 
>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>> 
>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>  	}
>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>> 
>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 0);
>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>> +
>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>> 
>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw)
>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>> 
>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>  #endif
>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>> 
>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>> 
>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw,
>>>>>>>>>>> 
>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>> 
>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>> +
>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>  		goto out;
>>>>>>>>>>> 
>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>  }
>>>>>>>>>>> 
>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>> +
>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>> +	}
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>> 
>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>  }
>>>>>>>>>>> 
>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>> *hw,
>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>> 
>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>> +
>>>>>>>>>>>  }
>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>> 
>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local, 
>>>>>>>>>>> int
>>>>>>>>>>> ac)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>> +
>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>> +
>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, &local->remove_list[ac],
>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>>> +
>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>> +		else
>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>> +	}
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>> +{
>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>> remove_timer);
>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>> +	int ac;
>>>>>>>>>>> +
>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>> +	}
>>>>>>>>>>> +
>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>> +}
>>>>>>>>>> 
>>>>>>>>>> I'll ask the same as I did last time (where you told me to hold
>>>>>>>>>> off
>>>>>>>>>> until this round):
>>>>>>>>>> 
>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>>> added
>>>>>>>>>> to
>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear the
>>>>>>>>>> list?
>>>>>>>>> Is it possible that a txq is not added to the remove list but 
>>>>>>>>> then
>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>> disconnects
>>>>>>>>> without any notification.
>>>>>>>> 
>>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>>> __unschedule_txq(), that should remove stations from the 
>>>>>>>> scheduler
>>>>>>>> when
>>>>>>>> they disconnect etc.
>>>>>>> Yes, the disconnect scenario is a bad example. My concern is, say,
>>>>>>> we
>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>> weight
>>>>>>> compared with that of others. Suppose, after its chance of Tx, it 
>>>>>>> is
>>>>>>> most likely to be placed in the rightmost(still has some packets 
>>>>>>> in
>>>>>>> the
>>>>>>> txq) and no more incoming data for it. The remaining packets in 
>>>>>>> txq
>>>>>>> will
>>>>>>> be dropped due to timeout algo in codel(correct me if I am wrong)
>>>>>>> but
>>>>>>> this empty txq will stay on the rbtree until other txqs get 
>>>>>>> drained
>>>>>>> or
>>>>>>> global vt catch up with its vt. The staying time could be long if
>>>>>>> weight
>>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>>> better
>>>>>>> solution?
>>>>>> 
>>>>>> Ah, I see what you mean. No, I don't think this will be a problem;
>>>>>> the
>>>>>> scenario you're describing would play out like this:
>>>>>> 
>>>>>> 1. Station ends transmitting, still has a single packet queued, 
>>>>>> gets
>>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>>> 
>>>>>> 2. When we finally get to the point where this station gets another
>>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>>> packet
>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>> 
>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not put 
>>>>>> it
>>>>>>    back on the rbtree.
>>>>>> 
>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the 
>>>>>> point
>>>>>> of
>>>>>> packet dequeue. But even if an empty queue stays on the rbtree for 
>>>>>> a
>>>>>> while, there is no harm in that: eventually it will get its turn, 
>>>>>> it
>>>>>> will turn out to be empty, and just be skipped over.
>>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>>> part
>>>>> in CoDel algorithm.
>>>> 
>>>> Yup, think so. And you're welcome :)
>>>> 
>>>>>> The issue we need to be concerned about is the opposite: If we have 
>>>>>> a
>>>>>> queue that *does* have packets queued, but which is *not* scheduled
>>>>>> for
>>>>>> transmission, that will stall TX.
>>>>> Is it by design since its vt is more than global vt, right? The
>>>>> lattency
>>>>> may somehow get impacted though.
>>>> 
>>>> Well, it should still stay on the rbtree as long as it has packets
>>>> queued. We don't have a check anywhere that reschedules TXQs whose 
>>>> v_t
>>>> drops below global v_t...
>>>> 
>>>>>> [0] CoDel in most cases only drops a single packet at a time, so it
>>>>>> will
>>>>>> not clear out an entire queue with multiple packets in one go. But
>>>>>> you
>>>>>> are right that it could conceivably drop the last packet in a 
>>>>>> queue.
>>>>>> 
>>>>>>>> We only need to defer removal inside a single "scheduling round"
>>>>>>>> (i.e.,
>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we just
>>>>>>>> walk
>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>> 
>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just to
>>>>>>>> have
>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>> 
>>>>>>>> I.e., unschedule_txq checks if the txq being removed is currently
>>>>>>>> being
>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>> schedule_pos
>>>>>>>> to
>>>>>>>> be the rb_next of the current value?
>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>> value,
>>>>>>> then in the next_txq() where we are going to use rb_next again and
>>>>>>> finally pick the next node of the node we really want. Is it fine 
>>>>>>> to
>>>>>>> update schedule_pos to NULL?
>>>>>> 
>>>>>> Hmm, yeah, good point.
>>>>>> 
>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>>> another
>>>>>> loop through the whole thing. I guess we may be able hit a case 
>>>>>> where
>>>>>> things can oscillate back and forth between addition and removal
>>>>>> resulting in an infinite loop? Not sure, but at least I can't seem 
>>>>>> to
>>>>>> convince myself that this can't happen.
>>>>> 
>>>>> As the loop of next_txq under lock protection as below,
>>>>> 
>>>>> txq_schedule_start();
>>>>> while(txq=next_txq()){
>>>>> ...
>>>>> return_txq(txq);
>>>>> }
>>>>> txq_schedule_end();
>>>>> 
>>>>> I do not see any chance of addition, no?
>>>> 
>>>> As you noted in your other email, Felix reduced the locking. And 
>>>> yeah,
>>>> we need to rebase this series to also incorporate that. I figure I 
>>>> can
>>>> send an updated version of the first patch in the series once we've
>>>> worked out the remaining issues with your follow-up patches.
>>>> 
>>> Oh, I was thinking we were discussing without locking reduced. Yes, I
>>> also agree there might be a case causing infinite loop. With locking
>>> reduced, the tree can be adjusted between next_txq() and return_txq() 
>>> in
>>> the loop situation. For further discussion, let 's consider,
>>> 1) the tree starts like:
>>>         A->B->C->D->E
>>> 2) then next_txq() returns A for dequeuing
>>> 3) driver dequeues A and draines A without any active txq locked 
>>> meaning
>>> the tree could be changed upon Tx compeletion.
>>> 4) then in return_txq(), the tree could be,
>>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>>> before the loop end)
>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>>> before the loop end)
>>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>>> before the loop end)
>>> 
>>> with this change:
>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>> 
>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>>> next_txq(), rb_next(B) is what we returns which actually is C and B is
>>> skipped, no?
>>> 
>>> Similiar for case ii, we skip B, C, D.
>> 
>> Yup, I think you're right. But if we can fix this by making
>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., if
>> resort_txq() acts on the txq that's currently in schedule_pos, it will
>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>> (optionally after checking that the position of the node is actually
>> going to change).
> Sorry, please igore last email sent by mistake.
>
> I don't think it makes any difference with that in unschedule_txq(). For 
> case i, it finally picks C as well in next_txq(). For next_txq(), 
> schedule_pos means previous candidate node whereas with your change, it 
> looks like schedule_pos is current candidate node instead.

Hmm, that was not actually what I was thinking, but yeah I think you're
right that it would be easier to just change it so schedule_pos is
pointing to the next and not the current txq we want to schedule.

We'd still need a check in resort_txq() then, but it would make it safe
to unschedule in return_txq()...

>>> Also I am wondering if there will be some SMP issues relating with
>>> local->schedule_pos[ac].
>> 
>> Not sure what you mean by this?
> My bad. Please ignore this.
>
>
>> 
>>>>> In ath10k, we will usually push packets of first txq as many as we 
>>>>> can
>>>>> until it is drained and then move to the next one. So if a txq gets
>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>> during this period, neither vt of any station or global vt can be
>>>>> updated due to lock protection.
>>>>> 
>>>>>> 
>>>>>> But in that case, we could fix it by just conditionally assigning
>>>>>> either
>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>>> something like:
>>>>>> 
>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>>> 
>>>> The logic in next_txq is different when schedule_pos[ac] is NULL, vs
>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>> scheduling round, while the latter ends the current round.
>>>> 
>>>> -Toke
>>> 
>>> --
>>> Yibo
>
> -- 
> Yibo


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 13:02                         ` Toke Høiland-Jørgensen
@ 2019-09-21 13:24                           ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 13:24 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>> rbtree
>>>>>>>>>>>> gets
>>>>>>>>>>>> removed from rbtree immediately in the 
>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>> the
>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering
>>>>>>>>>>>> the
>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>> 
>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>> 
>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>> I'll
>>>>>>>>>>> add
>>>>>>>>>>> ack or review tags as appropriate in reply; but a few 
>>>>>>>>>>> comments
>>>>>>>>>>> first:
>>>>>>>>>>> 
>>>>>>>>>>>> ---
>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>> 
>>>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>> 
>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>> 
>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>> +
>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>> *rate,
>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>  {
>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>   *
>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, 
>>>>>>>>>>>> it
>>>>>>>>>>>> will
>>>>>>>>>>>> be
>>>>>>>>>>>> added
>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>>>> eligible.
>>>>>>>>>>>> If a txq
>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>> after the
>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void 
>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>   *
>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>> Check
>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>   */
>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8
>>>>>>>>>>>> ac)
>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>> 
>>>>>>>>>>>>  /**
>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>> + *
>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>> + *
>>>>>>>>>>>> + */
>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>> +
>>>>>>>>>>>> +/**
>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is 
>>>>>>>>>>>> allowed
>>>>>>>>>>>> to
>>>>>>>>>>>> transmit
>>>>>>>>>>>>   *
>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>> allowed
>>>>>>>>>>>> to
>>>>>>>>>>>> transmit by
>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>> 
>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>> 
>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>> 
>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>> 
>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>  	}
>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>> 
>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 
>>>>>>>>>>>> 0);
>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>> +
>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw)
>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>> 
>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>  #endif
>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>> 
>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw,
>>>>>>>>>>>> 
>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>> 
>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>> +
>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>  }
>>>>>>>>>>>> 
>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>> +
>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>> +	}
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>> 
>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>  }
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw,
>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>> 
>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>> +
>>>>>>>>>>>>  }
>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>> 
>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local,
>>>>>>>>>>>> int
>>>>>>>>>>>> ac)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>> +
>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, 
>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>>>> +
>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>> +		else
>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>> +	}
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>> +	}
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>> +}
>>>>>>>>>>> 
>>>>>>>>>>> I'll ask the same as I did last time (where you told me to 
>>>>>>>>>>> hold
>>>>>>>>>>> off
>>>>>>>>>>> until this round):
>>>>>>>>>>> 
>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>>>> added
>>>>>>>>>>> to
>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear 
>>>>>>>>>>> the
>>>>>>>>>>> list?
>>>>>>>>>> Is it possible that a txq is not added to the remove list but
>>>>>>>>>> then
>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>> disconnects
>>>>>>>>>> without any notification.
>>>>>>>>> 
>>>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>> scheduler
>>>>>>>>> when
>>>>>>>>> they disconnect etc.
>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is, 
>>>>>>>> say,
>>>>>>>> we
>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>> weight
>>>>>>>> compared with that of others. Suppose, after its chance of Tx, 
>>>>>>>> it
>>>>>>>> is
>>>>>>>> most likely to be placed in the rightmost(still has some packets
>>>>>>>> in
>>>>>>>> the
>>>>>>>> txq) and no more incoming data for it. The remaining packets in
>>>>>>>> txq
>>>>>>>> will
>>>>>>>> be dropped due to timeout algo in codel(correct me if I am 
>>>>>>>> wrong)
>>>>>>>> but
>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>> drained
>>>>>>>> or
>>>>>>>> global vt catch up with its vt. The staying time could be long 
>>>>>>>> if
>>>>>>>> weight
>>>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>>>> better
>>>>>>>> solution?
>>>>>>> 
>>>>>>> Ah, I see what you mean. No, I don't think this will be a 
>>>>>>> problem;
>>>>>>> the
>>>>>>> scenario you're describing would play out like this:
>>>>>>> 
>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>> gets
>>>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>>>> 
>>>>>>> 2. When we finally get to the point where this station gets 
>>>>>>> another
>>>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>>>> packet
>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>> 
>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not 
>>>>>>> put
>>>>>>> it
>>>>>>>    back on the rbtree.
>>>>>>> 
>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>> point
>>>>>>> of
>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree 
>>>>>>> for
>>>>>>> a
>>>>>>> while, there is no harm in that: eventually it will get its turn,
>>>>>>> it
>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>>>> part
>>>>>> in CoDel algorithm.
>>>>> 
>>>>> Yup, think so. And you're welcome :)
>>>>> 
>>>>>>> The issue we need to be concerned about is the opposite: If we 
>>>>>>> have
>>>>>>> a
>>>>>>> queue that *does* have packets queued, but which is *not* 
>>>>>>> scheduled
>>>>>>> for
>>>>>>> transmission, that will stall TX.
>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>> lattency
>>>>>> may somehow get impacted though.
>>>>> 
>>>>> Well, it should still stay on the rbtree as long as it has packets
>>>>> queued. We don't have a check anywhere that reschedules TXQs whose
>>>>> v_t
>>>>> drops below global v_t...
>>>>> 
>>>>>>> [0] CoDel in most cases only drops a single packet at a time, so 
>>>>>>> it
>>>>>>> will
>>>>>>> not clear out an entire queue with multiple packets in one go. 
>>>>>>> But
>>>>>>> you
>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>> queue.
>>>>>>> 
>>>>>>>>> We only need to defer removal inside a single "scheduling 
>>>>>>>>> round"
>>>>>>>>> (i.e.,
>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we 
>>>>>>>>> just
>>>>>>>>> walk
>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>> 
>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just 
>>>>>>>>> to
>>>>>>>>> have
>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>> 
>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is 
>>>>>>>>> currently
>>>>>>>>> being
>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>> schedule_pos
>>>>>>>>> to
>>>>>>>>> be the rb_next of the current value?
>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>> value,
>>>>>>>> then in the next_txq() where we are going to use rb_next again 
>>>>>>>> and
>>>>>>>> finally pick the next node of the node we really want. Is it 
>>>>>>>> fine
>>>>>>>> to
>>>>>>>> update schedule_pos to NULL?
>>>>>>> 
>>>>>>> Hmm, yeah, good point.
>>>>>>> 
>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>>>> another
>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>> where
>>>>>>> things can oscillate back and forth between addition and removal
>>>>>>> resulting in an infinite loop? Not sure, but at least I can't 
>>>>>>> seem
>>>>>>> to
>>>>>>> convince myself that this can't happen.
>>>>>> 
>>>>>> As the loop of next_txq under lock protection as below,
>>>>>> 
>>>>>> txq_schedule_start();
>>>>>> while(txq=next_txq()){
>>>>>> ...
>>>>>> return_txq(txq);
>>>>>> }
>>>>>> txq_schedule_end();
>>>>>> 
>>>>>> I do not see any chance of addition, no?
>>>>> 
>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>> yeah,
>>>>> we need to rebase this series to also incorporate that. I figure I
>>>>> can
>>>>> send an updated version of the first patch in the series once we've
>>>>> worked out the remaining issues with your follow-up patches.
>>>>> 
>>>> Oh, I was thinking we were discussing without locking reduced. Yes, 
>>>> I
>>>> also agree there might be a case causing infinite loop. With locking
>>>> reduced, the tree can be adjusted between next_txq() and 
>>>> return_txq()
>>>> in
>>>> the loop situation. For further discussion, let 's consider,
>>>> 1) the tree starts like:
>>>>         A->B->C->D->E
>>>> 2) then next_txq() returns A for dequeuing
>>>> 3) driver dequeues A and draines A without any active txq locked
>>>> meaning
>>>> the tree could be changed upon Tx compeletion.
>>>> 4) then in return_txq(), the tree could be,
>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>>>> before the loop end)
>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>>>> before the loop end)
>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>>>> before the loop end)
>>>> 
>>>> with this change:
>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>> 
>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>>>> next_txq(), rb_next(B) is what we returns which actually is C and B 
>>>> is
>>>> skipped, no?
>>>> 
>>>> Similiar for case ii, we skip B, C, D.
>>> 
>>> Yup, I think you're right. But if we can fix this by making
>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., 
>>> if
>>> resort_txq() acts on the txq that's currently in schedule_pos, it 
>>> will
>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>> (optionally after checking that the position of the node is actually
>>> going to change).
>> Sorry, please igore last email sent by mistake.
>> 
>> I don't think it makes any difference with that in unschedule_txq(). 
>> For
>> case i, it finally picks C as well in next_txq(). For next_txq(),
>> schedule_pos means previous candidate node whereas with your change, 
>> it
>> looks like schedule_pos is current candidate node instead.
> 
> Hmm, that was not actually what I was thinking, but yeah I think you're
> right that it would be easier to just change it so schedule_pos is
> pointing to the next and not the current txq we want to schedule.
So do you mean we can change next_txq like this,

  struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 
ac)
  {
  	struct ieee80211_local *local = hw_to_local(hw);
	struct rb_node *node = local->schedule_pos[ac];
  	struct txq_info *txqi = NULL;
	bool first = false;

  	lockdep_assert_held(&local->active_txq_lock[ac]);

	if (!node) {
	        node = rb_first_cached(&local->active_txqs[ac]);
		first = true;
-	} else
-		node = rb_next(node);
+       }
+
	if (!node)
  		return NULL;



> 
> We'd still need a check in resort_txq() then, but it would make it safe
> to unschedule in return_txq()...
Yes, agree with that.


> 
>>>> Also I am wondering if there will be some SMP issues relating with
>>>> local->schedule_pos[ac].
>>> 
>>> Not sure what you mean by this?
>> My bad. Please ignore this.
>> 
>> 
>>> 
>>>>>> In ath10k, we will usually push packets of first txq as many as we
>>>>>> can
>>>>>> until it is drained and then move to the next one. So if a txq 
>>>>>> gets
>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>> during this period, neither vt of any station or global vt can be
>>>>>> updated due to lock protection.
>>>>>> 
>>>>>>> 
>>>>>>> But in that case, we could fix it by just conditionally assigning
>>>>>>> either
>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>>>> something like:
>>>>>>> 
>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>>>> 
>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL, 
>>>>> vs
>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>> scheduling round, while the latter ends the current round.
>>>>> 
>>>>> -Toke
>>>> 
>>>> --
>>>> Yibo
>> 
>> --
>> Yibo

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 13:24                           ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-21 13:24 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>> rbtree
>>>>>>>>>>>> gets
>>>>>>>>>>>> removed from rbtree immediately in the 
>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>> the
>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering
>>>>>>>>>>>> the
>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>> 
>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>> 
>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>> I'll
>>>>>>>>>>> add
>>>>>>>>>>> ack or review tags as appropriate in reply; but a few 
>>>>>>>>>>> comments
>>>>>>>>>>> first:
>>>>>>>>>>> 
>>>>>>>>>>>> ---
>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>> 
>>>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>> 
>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>> 
>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>> +
>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>> *rate,
>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>  {
>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>   *
>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, 
>>>>>>>>>>>> it
>>>>>>>>>>>> will
>>>>>>>>>>>> be
>>>>>>>>>>>> added
>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>>>> eligible.
>>>>>>>>>>>> If a txq
>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>> after the
>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void 
>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>   *
>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>> Check
>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>   */
>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8
>>>>>>>>>>>> ac)
>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>> 
>>>>>>>>>>>>  /**
>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>> + *
>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>> + *
>>>>>>>>>>>> + */
>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>> +
>>>>>>>>>>>> +/**
>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is 
>>>>>>>>>>>> allowed
>>>>>>>>>>>> to
>>>>>>>>>>>> transmit
>>>>>>>>>>>>   *
>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>> allowed
>>>>>>>>>>>> to
>>>>>>>>>>>> transmit by
>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>> 
>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>> 
>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>> 
>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>> 
>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>  	}
>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>> 
>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 
>>>>>>>>>>>> 0);
>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>> +
>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw)
>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>> 
>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>  #endif
>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>> 
>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw,
>>>>>>>>>>>> 
>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>> 
>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>> +
>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>  }
>>>>>>>>>>>> 
>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>> +
>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>> +	}
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>> 
>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>  }
>>>>>>>>>>>> 
>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>> *hw,
>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>> 
>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>> +
>>>>>>>>>>>>  }
>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>> 
>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local,
>>>>>>>>>>>> int
>>>>>>>>>>>> ac)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>> +
>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, 
>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>>>> +
>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>> +		else
>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>> +	}
>>>>>>>>>>>> +}
>>>>>>>>>>>> +
>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>> +	}
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>> +}
>>>>>>>>>>> 
>>>>>>>>>>> I'll ask the same as I did last time (where you told me to 
>>>>>>>>>>> hold
>>>>>>>>>>> off
>>>>>>>>>>> until this round):
>>>>>>>>>>> 
>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>>>> added
>>>>>>>>>>> to
>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear 
>>>>>>>>>>> the
>>>>>>>>>>> list?
>>>>>>>>>> Is it possible that a txq is not added to the remove list but
>>>>>>>>>> then
>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>> disconnects
>>>>>>>>>> without any notification.
>>>>>>>>> 
>>>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>> scheduler
>>>>>>>>> when
>>>>>>>>> they disconnect etc.
>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is, 
>>>>>>>> say,
>>>>>>>> we
>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>> weight
>>>>>>>> compared with that of others. Suppose, after its chance of Tx, 
>>>>>>>> it
>>>>>>>> is
>>>>>>>> most likely to be placed in the rightmost(still has some packets
>>>>>>>> in
>>>>>>>> the
>>>>>>>> txq) and no more incoming data for it. The remaining packets in
>>>>>>>> txq
>>>>>>>> will
>>>>>>>> be dropped due to timeout algo in codel(correct me if I am 
>>>>>>>> wrong)
>>>>>>>> but
>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>> drained
>>>>>>>> or
>>>>>>>> global vt catch up with its vt. The staying time could be long 
>>>>>>>> if
>>>>>>>> weight
>>>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>>>> better
>>>>>>>> solution?
>>>>>>> 
>>>>>>> Ah, I see what you mean. No, I don't think this will be a 
>>>>>>> problem;
>>>>>>> the
>>>>>>> scenario you're describing would play out like this:
>>>>>>> 
>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>> gets
>>>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>>>> 
>>>>>>> 2. When we finally get to the point where this station gets 
>>>>>>> another
>>>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>>>> packet
>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>> 
>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not 
>>>>>>> put
>>>>>>> it
>>>>>>>    back on the rbtree.
>>>>>>> 
>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>> point
>>>>>>> of
>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree 
>>>>>>> for
>>>>>>> a
>>>>>>> while, there is no harm in that: eventually it will get its turn,
>>>>>>> it
>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>>>> part
>>>>>> in CoDel algorithm.
>>>>> 
>>>>> Yup, think so. And you're welcome :)
>>>>> 
>>>>>>> The issue we need to be concerned about is the opposite: If we 
>>>>>>> have
>>>>>>> a
>>>>>>> queue that *does* have packets queued, but which is *not* 
>>>>>>> scheduled
>>>>>>> for
>>>>>>> transmission, that will stall TX.
>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>> lattency
>>>>>> may somehow get impacted though.
>>>>> 
>>>>> Well, it should still stay on the rbtree as long as it has packets
>>>>> queued. We don't have a check anywhere that reschedules TXQs whose
>>>>> v_t
>>>>> drops below global v_t...
>>>>> 
>>>>>>> [0] CoDel in most cases only drops a single packet at a time, so 
>>>>>>> it
>>>>>>> will
>>>>>>> not clear out an entire queue with multiple packets in one go. 
>>>>>>> But
>>>>>>> you
>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>> queue.
>>>>>>> 
>>>>>>>>> We only need to defer removal inside a single "scheduling 
>>>>>>>>> round"
>>>>>>>>> (i.e.,
>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we 
>>>>>>>>> just
>>>>>>>>> walk
>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>> 
>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just 
>>>>>>>>> to
>>>>>>>>> have
>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>> 
>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is 
>>>>>>>>> currently
>>>>>>>>> being
>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>> schedule_pos
>>>>>>>>> to
>>>>>>>>> be the rb_next of the current value?
>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>> value,
>>>>>>>> then in the next_txq() where we are going to use rb_next again 
>>>>>>>> and
>>>>>>>> finally pick the next node of the node we really want. Is it 
>>>>>>>> fine
>>>>>>>> to
>>>>>>>> update schedule_pos to NULL?
>>>>>>> 
>>>>>>> Hmm, yeah, good point.
>>>>>>> 
>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>>>> another
>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>> where
>>>>>>> things can oscillate back and forth between addition and removal
>>>>>>> resulting in an infinite loop? Not sure, but at least I can't 
>>>>>>> seem
>>>>>>> to
>>>>>>> convince myself that this can't happen.
>>>>>> 
>>>>>> As the loop of next_txq under lock protection as below,
>>>>>> 
>>>>>> txq_schedule_start();
>>>>>> while(txq=next_txq()){
>>>>>> ...
>>>>>> return_txq(txq);
>>>>>> }
>>>>>> txq_schedule_end();
>>>>>> 
>>>>>> I do not see any chance of addition, no?
>>>>> 
>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>> yeah,
>>>>> we need to rebase this series to also incorporate that. I figure I
>>>>> can
>>>>> send an updated version of the first patch in the series once we've
>>>>> worked out the remaining issues with your follow-up patches.
>>>>> 
>>>> Oh, I was thinking we were discussing without locking reduced. Yes, 
>>>> I
>>>> also agree there might be a case causing infinite loop. With locking
>>>> reduced, the tree can be adjusted between next_txq() and 
>>>> return_txq()
>>>> in
>>>> the loop situation. For further discussion, let 's consider,
>>>> 1) the tree starts like:
>>>>         A->B->C->D->E
>>>> 2) then next_txq() returns A for dequeuing
>>>> 3) driver dequeues A and draines A without any active txq locked
>>>> meaning
>>>> the tree could be changed upon Tx compeletion.
>>>> 4) then in return_txq(), the tree could be,
>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>>>> before the loop end)
>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>>>> before the loop end)
>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>>>> before the loop end)
>>>> 
>>>> with this change:
>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>> 
>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>>>> next_txq(), rb_next(B) is what we returns which actually is C and B 
>>>> is
>>>> skipped, no?
>>>> 
>>>> Similiar for case ii, we skip B, C, D.
>>> 
>>> Yup, I think you're right. But if we can fix this by making
>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., 
>>> if
>>> resort_txq() acts on the txq that's currently in schedule_pos, it 
>>> will
>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>> (optionally after checking that the position of the node is actually
>>> going to change).
>> Sorry, please igore last email sent by mistake.
>> 
>> I don't think it makes any difference with that in unschedule_txq(). 
>> For
>> case i, it finally picks C as well in next_txq(). For next_txq(),
>> schedule_pos means previous candidate node whereas with your change, 
>> it
>> looks like schedule_pos is current candidate node instead.
> 
> Hmm, that was not actually what I was thinking, but yeah I think you're
> right that it would be easier to just change it so schedule_pos is
> pointing to the next and not the current txq we want to schedule.
So do you mean we can change next_txq like this,

  struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 
ac)
  {
  	struct ieee80211_local *local = hw_to_local(hw);
	struct rb_node *node = local->schedule_pos[ac];
  	struct txq_info *txqi = NULL;
	bool first = false;

  	lockdep_assert_held(&local->active_txq_lock[ac]);

	if (!node) {
	        node = rb_first_cached(&local->active_txqs[ac]);
		first = true;
-	} else
-		node = rb_next(node);
+       }
+
	if (!node)
  		return NULL;



> 
> We'd still need a check in resort_txq() then, but it would make it safe
> to unschedule in return_txq()...
Yes, agree with that.


> 
>>>> Also I am wondering if there will be some SMP issues relating with
>>>> local->schedule_pos[ac].
>>> 
>>> Not sure what you mean by this?
>> My bad. Please ignore this.
>> 
>> 
>>> 
>>>>>> In ath10k, we will usually push packets of first txq as many as we
>>>>>> can
>>>>>> until it is drained and then move to the next one. So if a txq 
>>>>>> gets
>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>> during this period, neither vt of any station or global vt can be
>>>>>> updated due to lock protection.
>>>>>> 
>>>>>>> 
>>>>>>> But in that case, we could fix it by just conditionally assigning
>>>>>>> either
>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>>>> something like:
>>>>>>> 
>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>>>> 
>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL, 
>>>>> vs
>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>> scheduling round, while the latter ends the current round.
>>>>> 
>>>>> -Toke
>>>> 
>>>> --
>>>> Yibo
>> 
>> --
>> Yibo

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 13:24                           ` Yibo Zhao
@ 2019-09-21 14:00                             ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-21 14:00 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>>> rbtree
>>>>>>>>>>>>> gets
>>>>>>>>>>>>> removed from rbtree immediately in the 
>>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>>> the
>>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering
>>>>>>>>>>>>> the
>>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>>> 
>>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>>> I'll
>>>>>>>>>>>> add
>>>>>>>>>>>> ack or review tags as appropriate in reply; but a few 
>>>>>>>>>>>> comments
>>>>>>>>>>>> first:
>>>>>>>>>>>> 
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>>> 
>>>>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>>> *rate,
>>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>>  {
>>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>>   *
>>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, 
>>>>>>>>>>>>> it
>>>>>>>>>>>>> will
>>>>>>>>>>>>> be
>>>>>>>>>>>>> added
>>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>>>>> eligible.
>>>>>>>>>>>>> If a txq
>>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>>> after the
>>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void 
>>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>>   *
>>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>> Check
>>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>>   */
>>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8
>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  /**
>>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>>> + *
>>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>>> + *
>>>>>>>>>>>>> + */
>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +/**
>>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is 
>>>>>>>>>>>>> allowed
>>>>>>>>>>>>> to
>>>>>>>>>>>>> transmit
>>>>>>>>>>>>>   *
>>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>>> allowed
>>>>>>>>>>>>> to
>>>>>>>>>>>>> transmit by
>>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>>  	}
>>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 
>>>>>>>>>>>>> 0);
>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw)
>>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>  }
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>  }
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  }
>>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local,
>>>>>>>>>>>>> int
>>>>>>>>>>>>> ac)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, 
>>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>>> +		else
>>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>> +}
>>>>>>>>>>>> 
>>>>>>>>>>>> I'll ask the same as I did last time (where you told me to 
>>>>>>>>>>>> hold
>>>>>>>>>>>> off
>>>>>>>>>>>> until this round):
>>>>>>>>>>>> 
>>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>>>>> added
>>>>>>>>>>>> to
>>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear 
>>>>>>>>>>>> the
>>>>>>>>>>>> list?
>>>>>>>>>>> Is it possible that a txq is not added to the remove list but
>>>>>>>>>>> then
>>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>>> disconnects
>>>>>>>>>>> without any notification.
>>>>>>>>>> 
>>>>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>>> scheduler
>>>>>>>>>> when
>>>>>>>>>> they disconnect etc.
>>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is, 
>>>>>>>>> say,
>>>>>>>>> we
>>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>>> weight
>>>>>>>>> compared with that of others. Suppose, after its chance of Tx, 
>>>>>>>>> it
>>>>>>>>> is
>>>>>>>>> most likely to be placed in the rightmost(still has some packets
>>>>>>>>> in
>>>>>>>>> the
>>>>>>>>> txq) and no more incoming data for it. The remaining packets in
>>>>>>>>> txq
>>>>>>>>> will
>>>>>>>>> be dropped due to timeout algo in codel(correct me if I am 
>>>>>>>>> wrong)
>>>>>>>>> but
>>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>>> drained
>>>>>>>>> or
>>>>>>>>> global vt catch up with its vt. The staying time could be long 
>>>>>>>>> if
>>>>>>>>> weight
>>>>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>>>>> better
>>>>>>>>> solution?
>>>>>>>> 
>>>>>>>> Ah, I see what you mean. No, I don't think this will be a 
>>>>>>>> problem;
>>>>>>>> the
>>>>>>>> scenario you're describing would play out like this:
>>>>>>>> 
>>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>>> gets
>>>>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>>>>> 
>>>>>>>> 2. When we finally get to the point where this station gets 
>>>>>>>> another
>>>>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>>>>> packet
>>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>>> 
>>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not 
>>>>>>>> put
>>>>>>>> it
>>>>>>>>    back on the rbtree.
>>>>>>>> 
>>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>>> point
>>>>>>>> of
>>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree 
>>>>>>>> for
>>>>>>>> a
>>>>>>>> while, there is no harm in that: eventually it will get its turn,
>>>>>>>> it
>>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>>>>> part
>>>>>>> in CoDel algorithm.
>>>>>> 
>>>>>> Yup, think so. And you're welcome :)
>>>>>> 
>>>>>>>> The issue we need to be concerned about is the opposite: If we 
>>>>>>>> have
>>>>>>>> a
>>>>>>>> queue that *does* have packets queued, but which is *not* 
>>>>>>>> scheduled
>>>>>>>> for
>>>>>>>> transmission, that will stall TX.
>>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>>> lattency
>>>>>>> may somehow get impacted though.
>>>>>> 
>>>>>> Well, it should still stay on the rbtree as long as it has packets
>>>>>> queued. We don't have a check anywhere that reschedules TXQs whose
>>>>>> v_t
>>>>>> drops below global v_t...
>>>>>> 
>>>>>>>> [0] CoDel in most cases only drops a single packet at a time, so 
>>>>>>>> it
>>>>>>>> will
>>>>>>>> not clear out an entire queue with multiple packets in one go. 
>>>>>>>> But
>>>>>>>> you
>>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>>> queue.
>>>>>>>> 
>>>>>>>>>> We only need to defer removal inside a single "scheduling 
>>>>>>>>>> round"
>>>>>>>>>> (i.e.,
>>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we 
>>>>>>>>>> just
>>>>>>>>>> walk
>>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>>> 
>>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just 
>>>>>>>>>> to
>>>>>>>>>> have
>>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>>> 
>>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is 
>>>>>>>>>> currently
>>>>>>>>>> being
>>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>>> schedule_pos
>>>>>>>>>> to
>>>>>>>>>> be the rb_next of the current value?
>>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>>> value,
>>>>>>>>> then in the next_txq() where we are going to use rb_next again 
>>>>>>>>> and
>>>>>>>>> finally pick the next node of the node we really want. Is it 
>>>>>>>>> fine
>>>>>>>>> to
>>>>>>>>> update schedule_pos to NULL?
>>>>>>>> 
>>>>>>>> Hmm, yeah, good point.
>>>>>>>> 
>>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>>>>> another
>>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>>> where
>>>>>>>> things can oscillate back and forth between addition and removal
>>>>>>>> resulting in an infinite loop? Not sure, but at least I can't 
>>>>>>>> seem
>>>>>>>> to
>>>>>>>> convince myself that this can't happen.
>>>>>>> 
>>>>>>> As the loop of next_txq under lock protection as below,
>>>>>>> 
>>>>>>> txq_schedule_start();
>>>>>>> while(txq=next_txq()){
>>>>>>> ...
>>>>>>> return_txq(txq);
>>>>>>> }
>>>>>>> txq_schedule_end();
>>>>>>> 
>>>>>>> I do not see any chance of addition, no?
>>>>>> 
>>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>>> yeah,
>>>>>> we need to rebase this series to also incorporate that. I figure I
>>>>>> can
>>>>>> send an updated version of the first patch in the series once we've
>>>>>> worked out the remaining issues with your follow-up patches.
>>>>>> 
>>>>> Oh, I was thinking we were discussing without locking reduced. Yes, 
>>>>> I
>>>>> also agree there might be a case causing infinite loop. With locking
>>>>> reduced, the tree can be adjusted between next_txq() and 
>>>>> return_txq()
>>>>> in
>>>>> the loop situation. For further discussion, let 's consider,
>>>>> 1) the tree starts like:
>>>>>         A->B->C->D->E
>>>>> 2) then next_txq() returns A for dequeuing
>>>>> 3) driver dequeues A and draines A without any active txq locked
>>>>> meaning
>>>>> the tree could be changed upon Tx compeletion.
>>>>> 4) then in return_txq(), the tree could be,
>>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>>>>> before the loop end)
>>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>>>>> before the loop end)
>>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>>>>> before the loop end)
>>>>> 
>>>>> with this change:
>>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>> 
>>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>>>>> next_txq(), rb_next(B) is what we returns which actually is C and B 
>>>>> is
>>>>> skipped, no?
>>>>> 
>>>>> Similiar for case ii, we skip B, C, D.
>>>> 
>>>> Yup, I think you're right. But if we can fix this by making
>>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., 
>>>> if
>>>> resort_txq() acts on the txq that's currently in schedule_pos, it 
>>>> will
>>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>>> (optionally after checking that the position of the node is actually
>>>> going to change).
>>> Sorry, please igore last email sent by mistake.
>>> 
>>> I don't think it makes any difference with that in unschedule_txq(). 
>>> For
>>> case i, it finally picks C as well in next_txq(). For next_txq(),
>>> schedule_pos means previous candidate node whereas with your change, 
>>> it
>>> looks like schedule_pos is current candidate node instead.
>> 
>> Hmm, that was not actually what I was thinking, but yeah I think you're
>> right that it would be easier to just change it so schedule_pos is
>> pointing to the next and not the current txq we want to schedule.
> So do you mean we can change next_txq like this,
>
>   struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 
> ac)
>   {
>   	struct ieee80211_local *local = hw_to_local(hw);
> 	struct rb_node *node = local->schedule_pos[ac];
>   	struct txq_info *txqi = NULL;
> 	bool first = false;
>
>   	lockdep_assert_held(&local->active_txq_lock[ac]);
>
> 	if (!node) {
> 	        node = rb_first_cached(&local->active_txqs[ac]);
> 		first = true;
> -	} else
> -		node = rb_next(node);
> +       }
> +
> 	if (!node)
>   		return NULL;

Ah, no, now I remember why this didn't work and I went with the other
approach: If you make this change, you also have to have this at the
end:

local->schedule_pos[ac] = rb_next(node);


But this means we can no longer distinguish between having gone through
the whole thing (so rb_next() returns NULL), or starting out with
nothing.

So, instead we need to keep next_txq() the way it is, and just add

local->schedule_pos[ac] = rb_prev(node);

whenever we remove a node (both in return_txq() and resort_txq()).

>> 
>> We'd still need a check in resort_txq() then, but it would make it safe
>> to unschedule in return_txq()...
> Yes, agree with that.
>
>
>> 
>>>>> Also I am wondering if there will be some SMP issues relating with
>>>>> local->schedule_pos[ac].
>>>> 
>>>> Not sure what you mean by this?
>>> My bad. Please ignore this.
>>> 
>>> 
>>>> 
>>>>>>> In ath10k, we will usually push packets of first txq as many as we
>>>>>>> can
>>>>>>> until it is drained and then move to the next one. So if a txq 
>>>>>>> gets
>>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>>> during this period, neither vt of any station or global vt can be
>>>>>>> updated due to lock protection.
>>>>>>> 
>>>>>>>> 
>>>>>>>> But in that case, we could fix it by just conditionally assigning
>>>>>>>> either
>>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>>>>> something like:
>>>>>>>> 
>>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>>>>> 
>>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL, 
>>>>>> vs
>>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>>> scheduling round, while the latter ends the current round.
>>>>>> 
>>>>>> -Toke
>>>>> 
>>>>> --
>>>>> Yibo
>>> 
>>> --
>>> Yibo
>
> -- 
> Yibo


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-21 14:00                             ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-21 14:00 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>>> rbtree
>>>>>>>>>>>>> gets
>>>>>>>>>>>>> removed from rbtree immediately in the 
>>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>>> the
>>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, defering
>>>>>>>>>>>>> the
>>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>>> 
>>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>>> I'll
>>>>>>>>>>>> add
>>>>>>>>>>>> ack or review tags as appropriate in reply; but a few 
>>>>>>>>>>>> comments
>>>>>>>>>>>> first:
>>>>>>>>>>>> 
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>>> 
>>>>>>>>>>>>> diff --git a/include/net/mac80211.h b/include/net/mac80211.h
>>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>>> *rate,
>>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>>  {
>>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>>   *
>>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty, 
>>>>>>>>>>>>> it
>>>>>>>>>>>>> will
>>>>>>>>>>>>> be
>>>>>>>>>>>>> added
>>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue is
>>>>>>>>>>>>> eligible.
>>>>>>>>>>>>> If a txq
>>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>>> after the
>>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void 
>>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>>   *
>>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>> Check
>>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>>   */
>>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8
>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  /**
>>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>>> + *
>>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>>> + *
>>>>>>>>>>>>> + */
>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +/**
>>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is 
>>>>>>>>>>>>> allowed
>>>>>>>>>>>>> to
>>>>>>>>>>>>> transmit
>>>>>>>>>>>>>   *
>>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>>> allowed
>>>>>>>>>>>>> to
>>>>>>>>>>>>> transmit by
>>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>>  	}
>>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check, 
>>>>>>>>>>>>> 0);
>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw)
>>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>  }
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>  }
>>>>>>>>>>>>> 
>>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>>> +
>>>>>>>>>>>>>  }
>>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>>> 
>>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local *local,
>>>>>>>>>>>>> int
>>>>>>>>>>>>> ac)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp, 
>>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, sta);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>>> +		else
>>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>> +	}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>> +}
>>>>>>>>>>>> 
>>>>>>>>>>>> I'll ask the same as I did last time (where you told me to 
>>>>>>>>>>>> hold
>>>>>>>>>>>> off
>>>>>>>>>>>> until this round):
>>>>>>>>>>>> 
>>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs are
>>>>>>>>>>>> added
>>>>>>>>>>>> to
>>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear 
>>>>>>>>>>>> the
>>>>>>>>>>>> list?
>>>>>>>>>>> Is it possible that a txq is not added to the remove list but
>>>>>>>>>>> then
>>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>>> disconnects
>>>>>>>>>>> without any notification.
>>>>>>>>>> 
>>>>>>>>>> Well as long as all the other cleanup paths call directly into
>>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>>> scheduler
>>>>>>>>>> when
>>>>>>>>>> they disconnect etc.
>>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is, 
>>>>>>>>> say,
>>>>>>>>> we
>>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>>> weight
>>>>>>>>> compared with that of others. Suppose, after its chance of Tx, 
>>>>>>>>> it
>>>>>>>>> is
>>>>>>>>> most likely to be placed in the rightmost(still has some packets
>>>>>>>>> in
>>>>>>>>> the
>>>>>>>>> txq) and no more incoming data for it. The remaining packets in
>>>>>>>>> txq
>>>>>>>>> will
>>>>>>>>> be dropped due to timeout algo in codel(correct me if I am 
>>>>>>>>> wrong)
>>>>>>>>> but
>>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>>> drained
>>>>>>>>> or
>>>>>>>>> global vt catch up with its vt. The staying time could be long 
>>>>>>>>> if
>>>>>>>>> weight
>>>>>>>>> is extremely small. Then do we need timer to check or any other
>>>>>>>>> better
>>>>>>>>> solution?
>>>>>>>> 
>>>>>>>> Ah, I see what you mean. No, I don't think this will be a 
>>>>>>>> problem;
>>>>>>>> the
>>>>>>>> scenario you're describing would play out like this:
>>>>>>>> 
>>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>>> gets
>>>>>>>>    moved to the end of the rbtree (and stays there for a while).
>>>>>>>> 
>>>>>>>> 2. When we finally get to the point where this station gets 
>>>>>>>> another
>>>>>>>>    chance to transmit, the CoDel drop timer triggers and the last
>>>>>>>> packet
>>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>>> 
>>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not 
>>>>>>>> put
>>>>>>>> it
>>>>>>>>    back on the rbtree.
>>>>>>>> 
>>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>>> point
>>>>>>>> of
>>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree 
>>>>>>>> for
>>>>>>>> a
>>>>>>>> while, there is no harm in that: eventually it will get its turn,
>>>>>>>> it
>>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>>> Then that will be fine. Thanks for the explanation of the dropping
>>>>>>> part
>>>>>>> in CoDel algorithm.
>>>>>> 
>>>>>> Yup, think so. And you're welcome :)
>>>>>> 
>>>>>>>> The issue we need to be concerned about is the opposite: If we 
>>>>>>>> have
>>>>>>>> a
>>>>>>>> queue that *does* have packets queued, but which is *not* 
>>>>>>>> scheduled
>>>>>>>> for
>>>>>>>> transmission, that will stall TX.
>>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>>> lattency
>>>>>>> may somehow get impacted though.
>>>>>> 
>>>>>> Well, it should still stay on the rbtree as long as it has packets
>>>>>> queued. We don't have a check anywhere that reschedules TXQs whose
>>>>>> v_t
>>>>>> drops below global v_t...
>>>>>> 
>>>>>>>> [0] CoDel in most cases only drops a single packet at a time, so 
>>>>>>>> it
>>>>>>>> will
>>>>>>>> not clear out an entire queue with multiple packets in one go. 
>>>>>>>> But
>>>>>>>> you
>>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>>> queue.
>>>>>>>> 
>>>>>>>>>> We only need to defer removal inside a single "scheduling 
>>>>>>>>>> round"
>>>>>>>>>> (i.e.,
>>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we 
>>>>>>>>>> just
>>>>>>>>>> walk
>>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>>> 
>>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just 
>>>>>>>>>> to
>>>>>>>>>> have
>>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>>> 
>>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is 
>>>>>>>>>> currently
>>>>>>>>>> being
>>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>>> schedule_pos
>>>>>>>>>> to
>>>>>>>>>> be the rb_next of the current value?
>>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>>> value,
>>>>>>>>> then in the next_txq() where we are going to use rb_next again 
>>>>>>>>> and
>>>>>>>>> finally pick the next node of the node we really want. Is it 
>>>>>>>>> fine
>>>>>>>>> to
>>>>>>>>> update schedule_pos to NULL?
>>>>>>>> 
>>>>>>>> Hmm, yeah, good point.
>>>>>>>> 
>>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>>> scheduling round, that will make next_txq() "start over", and do
>>>>>>>> another
>>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>>> where
>>>>>>>> things can oscillate back and forth between addition and removal
>>>>>>>> resulting in an infinite loop? Not sure, but at least I can't 
>>>>>>>> seem
>>>>>>>> to
>>>>>>>> convince myself that this can't happen.
>>>>>>> 
>>>>>>> As the loop of next_txq under lock protection as below,
>>>>>>> 
>>>>>>> txq_schedule_start();
>>>>>>> while(txq=next_txq()){
>>>>>>> ...
>>>>>>> return_txq(txq);
>>>>>>> }
>>>>>>> txq_schedule_end();
>>>>>>> 
>>>>>>> I do not see any chance of addition, no?
>>>>>> 
>>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>>> yeah,
>>>>>> we need to rebase this series to also incorporate that. I figure I
>>>>>> can
>>>>>> send an updated version of the first patch in the series once we've
>>>>>> worked out the remaining issues with your follow-up patches.
>>>>>> 
>>>>> Oh, I was thinking we were discussing without locking reduced. Yes, 
>>>>> I
>>>>> also agree there might be a case causing infinite loop. With locking
>>>>> reduced, the tree can be adjusted between next_txq() and 
>>>>> return_txq()
>>>>> in
>>>>> the loop situation. For further discussion, let 's consider,
>>>>> 1) the tree starts like:
>>>>>         A->B->C->D->E
>>>>> 2) then next_txq() returns A for dequeuing
>>>>> 3) driver dequeues A and draines A without any active txq locked
>>>>> meaning
>>>>> the tree could be changed upon Tx compeletion.
>>>>> 4) then in return_txq(), the tree could be,
>>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added back
>>>>> before the loop end)
>>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added back
>>>>> before the loop end)
>>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added back
>>>>> before the loop end)
>>>>> 
>>>>> with this change:
>>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>> 
>>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and in
>>>>> next_txq(), rb_next(B) is what we returns which actually is C and B 
>>>>> is
>>>>> skipped, no?
>>>>> 
>>>>> Similiar for case ii, we skip B, C, D.
>>>> 
>>>> Yup, I think you're right. But if we can fix this by making
>>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e., 
>>>> if
>>>> resort_txq() acts on the txq that's currently in schedule_pos, it 
>>>> will
>>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>>> (optionally after checking that the position of the node is actually
>>>> going to change).
>>> Sorry, please igore last email sent by mistake.
>>> 
>>> I don't think it makes any difference with that in unschedule_txq(). 
>>> For
>>> case i, it finally picks C as well in next_txq(). For next_txq(),
>>> schedule_pos means previous candidate node whereas with your change, 
>>> it
>>> looks like schedule_pos is current candidate node instead.
>> 
>> Hmm, that was not actually what I was thinking, but yeah I think you're
>> right that it would be easier to just change it so schedule_pos is
>> pointing to the next and not the current txq we want to schedule.
> So do you mean we can change next_txq like this,
>
>   struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 
> ac)
>   {
>   	struct ieee80211_local *local = hw_to_local(hw);
> 	struct rb_node *node = local->schedule_pos[ac];
>   	struct txq_info *txqi = NULL;
> 	bool first = false;
>
>   	lockdep_assert_held(&local->active_txq_lock[ac]);
>
> 	if (!node) {
> 	        node = rb_first_cached(&local->active_txqs[ac]);
> 		first = true;
> -	} else
> -		node = rb_next(node);
> +       }
> +
> 	if (!node)
>   		return NULL;

Ah, no, now I remember why this didn't work and I went with the other
approach: If you make this change, you also have to have this at the
end:

local->schedule_pos[ac] = rb_next(node);


But this means we can no longer distinguish between having gone through
the whole thing (so rb_next() returns NULL), or starting out with
nothing.

So, instead we need to keep next_txq() the way it is, and just add

local->schedule_pos[ac] = rb_prev(node);

whenever we remove a node (both in return_txq() and resort_txq()).

>> 
>> We'd still need a check in resort_txq() then, but it would make it safe
>> to unschedule in return_txq()...
> Yes, agree with that.
>
>
>> 
>>>>> Also I am wondering if there will be some SMP issues relating with
>>>>> local->schedule_pos[ac].
>>>> 
>>>> Not sure what you mean by this?
>>> My bad. Please ignore this.
>>> 
>>> 
>>>> 
>>>>>>> In ath10k, we will usually push packets of first txq as many as we
>>>>>>> can
>>>>>>> until it is drained and then move to the next one. So if a txq 
>>>>>>> gets
>>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>>> during this period, neither vt of any station or global vt can be
>>>>>>> updated due to lock protection.
>>>>>>> 
>>>>>>>> 
>>>>>>>> But in that case, we could fix it by just conditionally assigning
>>>>>>>> either
>>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? I.e.,
>>>>>>>> something like:
>>>>>>>> 
>>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>>> schedule_pos[ac] will lead us to the next node of the one we want.
>>>>>> 
>>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL, 
>>>>>> vs
>>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>>> scheduling round, while the latter ends the current round.
>>>>>> 
>>>>>> -Toke
>>>>> 
>>>>> --
>>>>> Yibo
>>> 
>>> --
>>> Yibo
>
> -- 
> Yibo


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-21 14:00                             ` Toke Høiland-Jørgensen
@ 2019-09-22  5:19                               ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-22  5:19 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>>>> rbtree
>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>> removed from rbtree immediately in the
>>>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, 
>>>>>>>>>>>>>> defering
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>>>> I'll
>>>>>>>>>>>>> add
>>>>>>>>>>>>> ack or review tags as appropriate in reply; but a few
>>>>>>>>>>>>> comments
>>>>>>>>>>>>> first:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> diff --git a/include/net/mac80211.h 
>>>>>>>>>>>>>> b/include/net/mac80211.h
>>>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>>>> *rate,
>>>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>>>  {
>>>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty,
>>>>>>>>>>>>>> it
>>>>>>>>>>>>>> will
>>>>>>>>>>>>>> be
>>>>>>>>>>>>>> added
>>>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue 
>>>>>>>>>>>>>> is
>>>>>>>>>>>>>> eligible.
>>>>>>>>>>>>>> If a txq
>>>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>>>> after the
>>>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void
>>>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>> Check
>>>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>>>   */
>>>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, 
>>>>>>>>>>>>>> u8
>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  /**
>>>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>> + */
>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +/**
>>>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is
>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> transmit
>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> transmit by
>>>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>>>  	}
>>>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check,
>>>>>>>>>>>>>> 0);
>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw)
>>>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local 
>>>>>>>>>>>>>> *local,
>>>>>>>>>>>>>> int
>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp,
>>>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, 
>>>>>>>>>>>>>> sta);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>>>> +		else
>>>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I'll ask the same as I did last time (where you told me to
>>>>>>>>>>>>> hold
>>>>>>>>>>>>> off
>>>>>>>>>>>>> until this round):
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs 
>>>>>>>>>>>>> are
>>>>>>>>>>>>> added
>>>>>>>>>>>>> to
>>>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear
>>>>>>>>>>>>> the
>>>>>>>>>>>>> list?
>>>>>>>>>>>> Is it possible that a txq is not added to the remove list 
>>>>>>>>>>>> but
>>>>>>>>>>>> then
>>>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>>>> disconnects
>>>>>>>>>>>> without any notification.
>>>>>>>>>>> 
>>>>>>>>>>> Well as long as all the other cleanup paths call directly 
>>>>>>>>>>> into
>>>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>>>> scheduler
>>>>>>>>>>> when
>>>>>>>>>>> they disconnect etc.
>>>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is,
>>>>>>>>>> say,
>>>>>>>>>> we
>>>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>>>> weight
>>>>>>>>>> compared with that of others. Suppose, after its chance of Tx,
>>>>>>>>>> it
>>>>>>>>>> is
>>>>>>>>>> most likely to be placed in the rightmost(still has some 
>>>>>>>>>> packets
>>>>>>>>>> in
>>>>>>>>>> the
>>>>>>>>>> txq) and no more incoming data for it. The remaining packets 
>>>>>>>>>> in
>>>>>>>>>> txq
>>>>>>>>>> will
>>>>>>>>>> be dropped due to timeout algo in codel(correct me if I am
>>>>>>>>>> wrong)
>>>>>>>>>> but
>>>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>>>> drained
>>>>>>>>>> or
>>>>>>>>>> global vt catch up with its vt. The staying time could be long
>>>>>>>>>> if
>>>>>>>>>> weight
>>>>>>>>>> is extremely small. Then do we need timer to check or any 
>>>>>>>>>> other
>>>>>>>>>> better
>>>>>>>>>> solution?
>>>>>>>>> 
>>>>>>>>> Ah, I see what you mean. No, I don't think this will be a
>>>>>>>>> problem;
>>>>>>>>> the
>>>>>>>>> scenario you're describing would play out like this:
>>>>>>>>> 
>>>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>>>> gets
>>>>>>>>>    moved to the end of the rbtree (and stays there for a 
>>>>>>>>> while).
>>>>>>>>> 
>>>>>>>>> 2. When we finally get to the point where this station gets
>>>>>>>>> another
>>>>>>>>>    chance to transmit, the CoDel drop timer triggers and the 
>>>>>>>>> last
>>>>>>>>> packet
>>>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>>>> 
>>>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not
>>>>>>>>> put
>>>>>>>>> it
>>>>>>>>>    back on the rbtree.
>>>>>>>>> 
>>>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>>>> point
>>>>>>>>> of
>>>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree
>>>>>>>>> for
>>>>>>>>> a
>>>>>>>>> while, there is no harm in that: eventually it will get its 
>>>>>>>>> turn,
>>>>>>>>> it
>>>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>>>> Then that will be fine. Thanks for the explanation of the 
>>>>>>>> dropping
>>>>>>>> part
>>>>>>>> in CoDel algorithm.
>>>>>>> 
>>>>>>> Yup, think so. And you're welcome :)
>>>>>>> 
>>>>>>>>> The issue we need to be concerned about is the opposite: If we
>>>>>>>>> have
>>>>>>>>> a
>>>>>>>>> queue that *does* have packets queued, but which is *not*
>>>>>>>>> scheduled
>>>>>>>>> for
>>>>>>>>> transmission, that will stall TX.
>>>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>>>> lattency
>>>>>>>> may somehow get impacted though.
>>>>>>> 
>>>>>>> Well, it should still stay on the rbtree as long as it has 
>>>>>>> packets
>>>>>>> queued. We don't have a check anywhere that reschedules TXQs 
>>>>>>> whose
>>>>>>> v_t
>>>>>>> drops below global v_t...
>>>>>>> 
>>>>>>>>> [0] CoDel in most cases only drops a single packet at a time, 
>>>>>>>>> so
>>>>>>>>> it
>>>>>>>>> will
>>>>>>>>> not clear out an entire queue with multiple packets in one go.
>>>>>>>>> But
>>>>>>>>> you
>>>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>>>> queue.
>>>>>>>>> 
>>>>>>>>>>> We only need to defer removal inside a single "scheduling
>>>>>>>>>>> round"
>>>>>>>>>>> (i.e.,
>>>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we
>>>>>>>>>>> just
>>>>>>>>>>> walk
>>>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>>>> 
>>>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just
>>>>>>>>>>> to
>>>>>>>>>>> have
>>>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>>>> 
>>>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is
>>>>>>>>>>> currently
>>>>>>>>>>> being
>>>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>>>> schedule_pos
>>>>>>>>>>> to
>>>>>>>>>>> be the rb_next of the current value?
>>>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>>>> value,
>>>>>>>>>> then in the next_txq() where we are going to use rb_next again
>>>>>>>>>> and
>>>>>>>>>> finally pick the next node of the node we really want. Is it
>>>>>>>>>> fine
>>>>>>>>>> to
>>>>>>>>>> update schedule_pos to NULL?
>>>>>>>>> 
>>>>>>>>> Hmm, yeah, good point.
>>>>>>>>> 
>>>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>>>> scheduling round, that will make next_txq() "start over", and 
>>>>>>>>> do
>>>>>>>>> another
>>>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>>>> where
>>>>>>>>> things can oscillate back and forth between addition and 
>>>>>>>>> removal
>>>>>>>>> resulting in an infinite loop? Not sure, but at least I can't
>>>>>>>>> seem
>>>>>>>>> to
>>>>>>>>> convince myself that this can't happen.
>>>>>>>> 
>>>>>>>> As the loop of next_txq under lock protection as below,
>>>>>>>> 
>>>>>>>> txq_schedule_start();
>>>>>>>> while(txq=next_txq()){
>>>>>>>> ...
>>>>>>>> return_txq(txq);
>>>>>>>> }
>>>>>>>> txq_schedule_end();
>>>>>>>> 
>>>>>>>> I do not see any chance of addition, no?
>>>>>>> 
>>>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>>>> yeah,
>>>>>>> we need to rebase this series to also incorporate that. I figure 
>>>>>>> I
>>>>>>> can
>>>>>>> send an updated version of the first patch in the series once 
>>>>>>> we've
>>>>>>> worked out the remaining issues with your follow-up patches.
>>>>>>> 
>>>>>> Oh, I was thinking we were discussing without locking reduced. 
>>>>>> Yes,
>>>>>> I
>>>>>> also agree there might be a case causing infinite loop. With 
>>>>>> locking
>>>>>> reduced, the tree can be adjusted between next_txq() and
>>>>>> return_txq()
>>>>>> in
>>>>>> the loop situation. For further discussion, let 's consider,
>>>>>> 1) the tree starts like:
>>>>>>         A->B->C->D->E
>>>>>> 2) then next_txq() returns A for dequeuing
>>>>>> 3) driver dequeues A and draines A without any active txq locked
>>>>>> meaning
>>>>>> the tree could be changed upon Tx compeletion.
>>>>>> 4) then in return_txq(), the tree could be,
>>>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added 
>>>>>> back
>>>>>> before the loop end)
>>>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added 
>>>>>> back
>>>>>> before the loop end)
>>>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added 
>>>>>> back
>>>>>> before the loop end)
>>>>>> 
>>>>>> with this change:
>>>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>> 
>>>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and 
>>>>>> in
>>>>>> next_txq(), rb_next(B) is what we returns which actually is C and 
>>>>>> B
>>>>>> is
>>>>>> skipped, no?
>>>>>> 
>>>>>> Similiar for case ii, we skip B, C, D.
>>>>> 
>>>>> Yup, I think you're right. But if we can fix this by making
>>>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e.,
>>>>> if
>>>>> resort_txq() acts on the txq that's currently in schedule_pos, it
>>>>> will
>>>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>>>> (optionally after checking that the position of the node is 
>>>>> actually
>>>>> going to change).
>>>> Sorry, please igore last email sent by mistake.
>>>> 
>>>> I don't think it makes any difference with that in unschedule_txq().
>>>> For
>>>> case i, it finally picks C as well in next_txq(). For next_txq(),
>>>> schedule_pos means previous candidate node whereas with your change,
>>>> it
>>>> looks like schedule_pos is current candidate node instead.
>>> 
>>> Hmm, that was not actually what I was thinking, but yeah I think 
>>> you're
>>> right that it would be easier to just change it so schedule_pos is
>>> pointing to the next and not the current txq we want to schedule.
>> So do you mean we can change next_txq like this,
>> 
>>   struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8
>> ac)
>>   {
>>   	struct ieee80211_local *local = hw_to_local(hw);
>> 	struct rb_node *node = local->schedule_pos[ac];
>>   	struct txq_info *txqi = NULL;
>> 	bool first = false;
>> 
>>   	lockdep_assert_held(&local->active_txq_lock[ac]);
>> 
>> 	if (!node) {
>> 	        node = rb_first_cached(&local->active_txqs[ac]);
>> 		first = true;
>> -	} else
>> -		node = rb_next(node);
>> +       }
>> +
>> 	if (!node)
>>   		return NULL;
> 
> Ah, no, now I remember why this didn't work and I went with the other
> approach: If you make this change, you also have to have this at the
> end:
> 
> local->schedule_pos[ac] = rb_next(node);
> 
> 
> But this means we can no longer distinguish between having gone through
> the whole thing (so rb_next() returns NULL), or starting out with
> nothing.
> 
> So, instead we need to keep next_txq() the way it is, and just add

Right, should keep next_txq() the way it is.

> 
> local->schedule_pos[ac] = rb_prev(node);
> 
> whenever we remove a node (both in return_txq() and resort_txq()).

Agree, and also we may need to consider case like A is removed and soon 
be added back just the same as ii),
        B->C->A->D->E
then B is schedule, removed and soon added back,
        C->A->B->D->E
A and B will have a second chance to be scheduled and this may happen to 
others as well leading to the infinite loop as you have mentioned 
previously, so do we need to maintain a schedule_round like we do in 
DRR? Like,
     - If the node is in the same round, by pass schedule, go to 
rb_next(), either continue loop this round or end this round.
     - Increase the schedule_round at the schedule_start() only when the 
schedule_pos is NULL.

> 
>>> 
>>> We'd still need a check in resort_txq() then, but it would make it 
>>> safe
>>> to unschedule in return_txq()...
>> Yes, agree with that.
>> 
>> 
>>> 
>>>>>> Also I am wondering if there will be some SMP issues relating with
>>>>>> local->schedule_pos[ac].
>>>>> 
>>>>> Not sure what you mean by this?
>>>> My bad. Please ignore this.
>>>> 
>>>> 
>>>>> 
>>>>>>>> In ath10k, we will usually push packets of first txq as many as 
>>>>>>>> we
>>>>>>>> can
>>>>>>>> until it is drained and then move to the next one. So if a txq
>>>>>>>> gets
>>>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>>>> during this period, neither vt of any station or global vt can 
>>>>>>>> be
>>>>>>>> updated due to lock protection.
>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> But in that case, we could fix it by just conditionally 
>>>>>>>>> assigning
>>>>>>>>> either
>>>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? 
>>>>>>>>> I.e.,
>>>>>>>>> something like:
>>>>>>>>> 
>>>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>>>> schedule_pos[ac] will lead us to the next node of the one we 
>>>>>>>> want.
>>>>>>> 
>>>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL,
>>>>>>> vs
>>>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>>>> scheduling round, while the latter ends the current round.
>>>>>>> 
>>>>>>> -Toke
>>>>>> 
>>>>>> --
>>>>>> Yibo
>>>> 
>>>> --
>>>> Yibo
>> 
>> --
>> Yibo

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-22  5:19                               ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-22  5:19 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
> 
>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>>>> rbtree
>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>> removed from rbtree immediately in the
>>>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, 
>>>>>>>>>>>>>> defering
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>>>> I'll
>>>>>>>>>>>>> add
>>>>>>>>>>>>> ack or review tags as appropriate in reply; but a few
>>>>>>>>>>>>> comments
>>>>>>>>>>>>> first:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> diff --git a/include/net/mac80211.h 
>>>>>>>>>>>>>> b/include/net/mac80211.h
>>>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>>>> *rate,
>>>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>>>  {
>>>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty,
>>>>>>>>>>>>>> it
>>>>>>>>>>>>>> will
>>>>>>>>>>>>>> be
>>>>>>>>>>>>>> added
>>>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue 
>>>>>>>>>>>>>> is
>>>>>>>>>>>>>> eligible.
>>>>>>>>>>>>>> If a txq
>>>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>>>> after the
>>>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void
>>>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>> Check
>>>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>>>   */
>>>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, 
>>>>>>>>>>>>>> u8
>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  /**
>>>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>> + */
>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +/**
>>>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is
>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> transmit
>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> transmit by
>>>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>>>  	}
>>>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check,
>>>>>>>>>>>>>> 0);
>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw)
>>>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local 
>>>>>>>>>>>>>> *local,
>>>>>>>>>>>>>> int
>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp,
>>>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, 
>>>>>>>>>>>>>> sta);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>>>> +		else
>>>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>> +}
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I'll ask the same as I did last time (where you told me to
>>>>>>>>>>>>> hold
>>>>>>>>>>>>> off
>>>>>>>>>>>>> until this round):
>>>>>>>>>>>>> 
>>>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs 
>>>>>>>>>>>>> are
>>>>>>>>>>>>> added
>>>>>>>>>>>>> to
>>>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear
>>>>>>>>>>>>> the
>>>>>>>>>>>>> list?
>>>>>>>>>>>> Is it possible that a txq is not added to the remove list 
>>>>>>>>>>>> but
>>>>>>>>>>>> then
>>>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>>>> disconnects
>>>>>>>>>>>> without any notification.
>>>>>>>>>>> 
>>>>>>>>>>> Well as long as all the other cleanup paths call directly 
>>>>>>>>>>> into
>>>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>>>> scheduler
>>>>>>>>>>> when
>>>>>>>>>>> they disconnect etc.
>>>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is,
>>>>>>>>>> say,
>>>>>>>>>> we
>>>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>>>> weight
>>>>>>>>>> compared with that of others. Suppose, after its chance of Tx,
>>>>>>>>>> it
>>>>>>>>>> is
>>>>>>>>>> most likely to be placed in the rightmost(still has some 
>>>>>>>>>> packets
>>>>>>>>>> in
>>>>>>>>>> the
>>>>>>>>>> txq) and no more incoming data for it. The remaining packets 
>>>>>>>>>> in
>>>>>>>>>> txq
>>>>>>>>>> will
>>>>>>>>>> be dropped due to timeout algo in codel(correct me if I am
>>>>>>>>>> wrong)
>>>>>>>>>> but
>>>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>>>> drained
>>>>>>>>>> or
>>>>>>>>>> global vt catch up with its vt. The staying time could be long
>>>>>>>>>> if
>>>>>>>>>> weight
>>>>>>>>>> is extremely small. Then do we need timer to check or any 
>>>>>>>>>> other
>>>>>>>>>> better
>>>>>>>>>> solution?
>>>>>>>>> 
>>>>>>>>> Ah, I see what you mean. No, I don't think this will be a
>>>>>>>>> problem;
>>>>>>>>> the
>>>>>>>>> scenario you're describing would play out like this:
>>>>>>>>> 
>>>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>>>> gets
>>>>>>>>>    moved to the end of the rbtree (and stays there for a 
>>>>>>>>> while).
>>>>>>>>> 
>>>>>>>>> 2. When we finally get to the point where this station gets
>>>>>>>>> another
>>>>>>>>>    chance to transmit, the CoDel drop timer triggers and the 
>>>>>>>>> last
>>>>>>>>> packet
>>>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>>>> 
>>>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not
>>>>>>>>> put
>>>>>>>>> it
>>>>>>>>>    back on the rbtree.
>>>>>>>>> 
>>>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>>>> point
>>>>>>>>> of
>>>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree
>>>>>>>>> for
>>>>>>>>> a
>>>>>>>>> while, there is no harm in that: eventually it will get its 
>>>>>>>>> turn,
>>>>>>>>> it
>>>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>>>> Then that will be fine. Thanks for the explanation of the 
>>>>>>>> dropping
>>>>>>>> part
>>>>>>>> in CoDel algorithm.
>>>>>>> 
>>>>>>> Yup, think so. And you're welcome :)
>>>>>>> 
>>>>>>>>> The issue we need to be concerned about is the opposite: If we
>>>>>>>>> have
>>>>>>>>> a
>>>>>>>>> queue that *does* have packets queued, but which is *not*
>>>>>>>>> scheduled
>>>>>>>>> for
>>>>>>>>> transmission, that will stall TX.
>>>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>>>> lattency
>>>>>>>> may somehow get impacted though.
>>>>>>> 
>>>>>>> Well, it should still stay on the rbtree as long as it has 
>>>>>>> packets
>>>>>>> queued. We don't have a check anywhere that reschedules TXQs 
>>>>>>> whose
>>>>>>> v_t
>>>>>>> drops below global v_t...
>>>>>>> 
>>>>>>>>> [0] CoDel in most cases only drops a single packet at a time, 
>>>>>>>>> so
>>>>>>>>> it
>>>>>>>>> will
>>>>>>>>> not clear out an entire queue with multiple packets in one go.
>>>>>>>>> But
>>>>>>>>> you
>>>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>>>> queue.
>>>>>>>>> 
>>>>>>>>>>> We only need to defer removal inside a single "scheduling
>>>>>>>>>>> round"
>>>>>>>>>>> (i.e.,
>>>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we
>>>>>>>>>>> just
>>>>>>>>>>> walk
>>>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>>>> 
>>>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just
>>>>>>>>>>> to
>>>>>>>>>>> have
>>>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>>>> 
>>>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is
>>>>>>>>>>> currently
>>>>>>>>>>> being
>>>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>>>> schedule_pos
>>>>>>>>>>> to
>>>>>>>>>>> be the rb_next of the current value?
>>>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>>>> value,
>>>>>>>>>> then in the next_txq() where we are going to use rb_next again
>>>>>>>>>> and
>>>>>>>>>> finally pick the next node of the node we really want. Is it
>>>>>>>>>> fine
>>>>>>>>>> to
>>>>>>>>>> update schedule_pos to NULL?
>>>>>>>>> 
>>>>>>>>> Hmm, yeah, good point.
>>>>>>>>> 
>>>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>>>> scheduling round, that will make next_txq() "start over", and 
>>>>>>>>> do
>>>>>>>>> another
>>>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>>>> where
>>>>>>>>> things can oscillate back and forth between addition and 
>>>>>>>>> removal
>>>>>>>>> resulting in an infinite loop? Not sure, but at least I can't
>>>>>>>>> seem
>>>>>>>>> to
>>>>>>>>> convince myself that this can't happen.
>>>>>>>> 
>>>>>>>> As the loop of next_txq under lock protection as below,
>>>>>>>> 
>>>>>>>> txq_schedule_start();
>>>>>>>> while(txq=next_txq()){
>>>>>>>> ...
>>>>>>>> return_txq(txq);
>>>>>>>> }
>>>>>>>> txq_schedule_end();
>>>>>>>> 
>>>>>>>> I do not see any chance of addition, no?
>>>>>>> 
>>>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>>>> yeah,
>>>>>>> we need to rebase this series to also incorporate that. I figure 
>>>>>>> I
>>>>>>> can
>>>>>>> send an updated version of the first patch in the series once 
>>>>>>> we've
>>>>>>> worked out the remaining issues with your follow-up patches.
>>>>>>> 
>>>>>> Oh, I was thinking we were discussing without locking reduced. 
>>>>>> Yes,
>>>>>> I
>>>>>> also agree there might be a case causing infinite loop. With 
>>>>>> locking
>>>>>> reduced, the tree can be adjusted between next_txq() and
>>>>>> return_txq()
>>>>>> in
>>>>>> the loop situation. For further discussion, let 's consider,
>>>>>> 1) the tree starts like:
>>>>>>         A->B->C->D->E
>>>>>> 2) then next_txq() returns A for dequeuing
>>>>>> 3) driver dequeues A and draines A without any active txq locked
>>>>>> meaning
>>>>>> the tree could be changed upon Tx compeletion.
>>>>>> 4) then in return_txq(), the tree could be,
>>>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added 
>>>>>> back
>>>>>> before the loop end)
>>>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added 
>>>>>> back
>>>>>> before the loop end)
>>>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added 
>>>>>> back
>>>>>> before the loop end)
>>>>>> 
>>>>>> with this change:
>>>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>> 
>>>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and 
>>>>>> in
>>>>>> next_txq(), rb_next(B) is what we returns which actually is C and 
>>>>>> B
>>>>>> is
>>>>>> skipped, no?
>>>>>> 
>>>>>> Similiar for case ii, we skip B, C, D.
>>>>> 
>>>>> Yup, I think you're right. But if we can fix this by making
>>>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e.,
>>>>> if
>>>>> resort_txq() acts on the txq that's currently in schedule_pos, it
>>>>> will
>>>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>>>> (optionally after checking that the position of the node is 
>>>>> actually
>>>>> going to change).
>>>> Sorry, please igore last email sent by mistake.
>>>> 
>>>> I don't think it makes any difference with that in unschedule_txq().
>>>> For
>>>> case i, it finally picks C as well in next_txq(). For next_txq(),
>>>> schedule_pos means previous candidate node whereas with your change,
>>>> it
>>>> looks like schedule_pos is current candidate node instead.
>>> 
>>> Hmm, that was not actually what I was thinking, but yeah I think 
>>> you're
>>> right that it would be easier to just change it so schedule_pos is
>>> pointing to the next and not the current txq we want to schedule.
>> So do you mean we can change next_txq like this,
>> 
>>   struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8
>> ac)
>>   {
>>   	struct ieee80211_local *local = hw_to_local(hw);
>> 	struct rb_node *node = local->schedule_pos[ac];
>>   	struct txq_info *txqi = NULL;
>> 	bool first = false;
>> 
>>   	lockdep_assert_held(&local->active_txq_lock[ac]);
>> 
>> 	if (!node) {
>> 	        node = rb_first_cached(&local->active_txqs[ac]);
>> 		first = true;
>> -	} else
>> -		node = rb_next(node);
>> +       }
>> +
>> 	if (!node)
>>   		return NULL;
> 
> Ah, no, now I remember why this didn't work and I went with the other
> approach: If you make this change, you also have to have this at the
> end:
> 
> local->schedule_pos[ac] = rb_next(node);
> 
> 
> But this means we can no longer distinguish between having gone through
> the whole thing (so rb_next() returns NULL), or starting out with
> nothing.
> 
> So, instead we need to keep next_txq() the way it is, and just add

Right, should keep next_txq() the way it is.

> 
> local->schedule_pos[ac] = rb_prev(node);
> 
> whenever we remove a node (both in return_txq() and resort_txq()).

Agree, and also we may need to consider case like A is removed and soon 
be added back just the same as ii),
        B->C->A->D->E
then B is schedule, removed and soon added back,
        C->A->B->D->E
A and B will have a second chance to be scheduled and this may happen to 
others as well leading to the infinite loop as you have mentioned 
previously, so do we need to maintain a schedule_round like we do in 
DRR? Like,
     - If the node is in the same round, by pass schedule, go to 
rb_next(), either continue loop this round or end this round.
     - Increase the schedule_round at the schedule_start() only when the 
schedule_pos is NULL.

> 
>>> 
>>> We'd still need a check in resort_txq() then, but it would make it 
>>> safe
>>> to unschedule in return_txq()...
>> Yes, agree with that.
>> 
>> 
>>> 
>>>>>> Also I am wondering if there will be some SMP issues relating with
>>>>>> local->schedule_pos[ac].
>>>>> 
>>>>> Not sure what you mean by this?
>>>> My bad. Please ignore this.
>>>> 
>>>> 
>>>>> 
>>>>>>>> In ath10k, we will usually push packets of first txq as many as 
>>>>>>>> we
>>>>>>>> can
>>>>>>>> until it is drained and then move to the next one. So if a txq
>>>>>>>> gets
>>>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>>>> during this period, neither vt of any station or global vt can 
>>>>>>>> be
>>>>>>>> updated due to lock protection.
>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> But in that case, we could fix it by just conditionally 
>>>>>>>>> assigning
>>>>>>>>> either
>>>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? 
>>>>>>>>> I.e.,
>>>>>>>>> something like:
>>>>>>>>> 
>>>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>>>> schedule_pos[ac] will lead us to the next node of the one we 
>>>>>>>> want.
>>>>>>> 
>>>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL,
>>>>>>> vs
>>>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>>>> scheduling round, while the latter ends the current round.
>>>>>>> 
>>>>>>> -Toke
>>>>>> 
>>>>>> --
>>>>>> Yibo
>>>> 
>>>> --
>>>> Yibo
>> 
>> --
>> Yibo

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-22  5:19                               ` Yibo Zhao
@ 2019-09-23 10:47                                 ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-23 10:47 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>>>>> rbtree
>>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>>> removed from rbtree immediately in the
>>>>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, 
>>>>>>>>>>>>>>> defering
>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>>>>> I'll
>>>>>>>>>>>>>> add
>>>>>>>>>>>>>> ack or review tags as appropriate in reply; but a few
>>>>>>>>>>>>>> comments
>>>>>>>>>>>>>> first:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> diff --git a/include/net/mac80211.h 
>>>>>>>>>>>>>>> b/include/net/mac80211.h
>>>>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>>>>> *rate,
>>>>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>>>>  {
>>>>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty,
>>>>>>>>>>>>>>> it
>>>>>>>>>>>>>>> will
>>>>>>>>>>>>>>> be
>>>>>>>>>>>>>>> added
>>>>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue 
>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>> eligible.
>>>>>>>>>>>>>>> If a txq
>>>>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>>>>> after the
>>>>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void
>>>>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>>> Check
>>>>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>>>>   */
>>>>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, 
>>>>>>>>>>>>>>> u8
>>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  /**
>>>>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>>> + */
>>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +/**
>>>>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is
>>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>> transmit
>>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>> transmit by
>>>>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>>>>  	}
>>>>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check,
>>>>>>>>>>>>>>> 0);
>>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw)
>>>>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local 
>>>>>>>>>>>>>>> *local,
>>>>>>>>>>>>>>> int
>>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp,
>>>>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, 
>>>>>>>>>>>>>>> sta);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>>>>> +		else
>>>>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> I'll ask the same as I did last time (where you told me to
>>>>>>>>>>>>>> hold
>>>>>>>>>>>>>> off
>>>>>>>>>>>>>> until this round):
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs 
>>>>>>>>>>>>>> are
>>>>>>>>>>>>>> added
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> list?
>>>>>>>>>>>>> Is it possible that a txq is not added to the remove list 
>>>>>>>>>>>>> but
>>>>>>>>>>>>> then
>>>>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>>>>> disconnects
>>>>>>>>>>>>> without any notification.
>>>>>>>>>>>> 
>>>>>>>>>>>> Well as long as all the other cleanup paths call directly 
>>>>>>>>>>>> into
>>>>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>>>>> scheduler
>>>>>>>>>>>> when
>>>>>>>>>>>> they disconnect etc.
>>>>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is,
>>>>>>>>>>> say,
>>>>>>>>>>> we
>>>>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>>>>> weight
>>>>>>>>>>> compared with that of others. Suppose, after its chance of Tx,
>>>>>>>>>>> it
>>>>>>>>>>> is
>>>>>>>>>>> most likely to be placed in the rightmost(still has some 
>>>>>>>>>>> packets
>>>>>>>>>>> in
>>>>>>>>>>> the
>>>>>>>>>>> txq) and no more incoming data for it. The remaining packets 
>>>>>>>>>>> in
>>>>>>>>>>> txq
>>>>>>>>>>> will
>>>>>>>>>>> be dropped due to timeout algo in codel(correct me if I am
>>>>>>>>>>> wrong)
>>>>>>>>>>> but
>>>>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>>>>> drained
>>>>>>>>>>> or
>>>>>>>>>>> global vt catch up with its vt. The staying time could be long
>>>>>>>>>>> if
>>>>>>>>>>> weight
>>>>>>>>>>> is extremely small. Then do we need timer to check or any 
>>>>>>>>>>> other
>>>>>>>>>>> better
>>>>>>>>>>> solution?
>>>>>>>>>> 
>>>>>>>>>> Ah, I see what you mean. No, I don't think this will be a
>>>>>>>>>> problem;
>>>>>>>>>> the
>>>>>>>>>> scenario you're describing would play out like this:
>>>>>>>>>> 
>>>>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>>>>> gets
>>>>>>>>>>    moved to the end of the rbtree (and stays there for a 
>>>>>>>>>> while).
>>>>>>>>>> 
>>>>>>>>>> 2. When we finally get to the point where this station gets
>>>>>>>>>> another
>>>>>>>>>>    chance to transmit, the CoDel drop timer triggers and the 
>>>>>>>>>> last
>>>>>>>>>> packet
>>>>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>>>>> 
>>>>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not
>>>>>>>>>> put
>>>>>>>>>> it
>>>>>>>>>>    back on the rbtree.
>>>>>>>>>> 
>>>>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>>>>> point
>>>>>>>>>> of
>>>>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree
>>>>>>>>>> for
>>>>>>>>>> a
>>>>>>>>>> while, there is no harm in that: eventually it will get its 
>>>>>>>>>> turn,
>>>>>>>>>> it
>>>>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>>>>> Then that will be fine. Thanks for the explanation of the 
>>>>>>>>> dropping
>>>>>>>>> part
>>>>>>>>> in CoDel algorithm.
>>>>>>>> 
>>>>>>>> Yup, think so. And you're welcome :)
>>>>>>>> 
>>>>>>>>>> The issue we need to be concerned about is the opposite: If we
>>>>>>>>>> have
>>>>>>>>>> a
>>>>>>>>>> queue that *does* have packets queued, but which is *not*
>>>>>>>>>> scheduled
>>>>>>>>>> for
>>>>>>>>>> transmission, that will stall TX.
>>>>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>>>>> lattency
>>>>>>>>> may somehow get impacted though.
>>>>>>>> 
>>>>>>>> Well, it should still stay on the rbtree as long as it has 
>>>>>>>> packets
>>>>>>>> queued. We don't have a check anywhere that reschedules TXQs 
>>>>>>>> whose
>>>>>>>> v_t
>>>>>>>> drops below global v_t...
>>>>>>>> 
>>>>>>>>>> [0] CoDel in most cases only drops a single packet at a time, 
>>>>>>>>>> so
>>>>>>>>>> it
>>>>>>>>>> will
>>>>>>>>>> not clear out an entire queue with multiple packets in one go.
>>>>>>>>>> But
>>>>>>>>>> you
>>>>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>>>>> queue.
>>>>>>>>>> 
>>>>>>>>>>>> We only need to defer removal inside a single "scheduling
>>>>>>>>>>>> round"
>>>>>>>>>>>> (i.e.,
>>>>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we
>>>>>>>>>>>> just
>>>>>>>>>>>> walk
>>>>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>>>>> 
>>>>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just
>>>>>>>>>>>> to
>>>>>>>>>>>> have
>>>>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>>>>> 
>>>>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is
>>>>>>>>>>>> currently
>>>>>>>>>>>> being
>>>>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>> to
>>>>>>>>>>>> be the rb_next of the current value?
>>>>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>>>>> value,
>>>>>>>>>>> then in the next_txq() where we are going to use rb_next again
>>>>>>>>>>> and
>>>>>>>>>>> finally pick the next node of the node we really want. Is it
>>>>>>>>>>> fine
>>>>>>>>>>> to
>>>>>>>>>>> update schedule_pos to NULL?
>>>>>>>>>> 
>>>>>>>>>> Hmm, yeah, good point.
>>>>>>>>>> 
>>>>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>>>>> scheduling round, that will make next_txq() "start over", and 
>>>>>>>>>> do
>>>>>>>>>> another
>>>>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>>>>> where
>>>>>>>>>> things can oscillate back and forth between addition and 
>>>>>>>>>> removal
>>>>>>>>>> resulting in an infinite loop? Not sure, but at least I can't
>>>>>>>>>> seem
>>>>>>>>>> to
>>>>>>>>>> convince myself that this can't happen.
>>>>>>>>> 
>>>>>>>>> As the loop of next_txq under lock protection as below,
>>>>>>>>> 
>>>>>>>>> txq_schedule_start();
>>>>>>>>> while(txq=next_txq()){
>>>>>>>>> ...
>>>>>>>>> return_txq(txq);
>>>>>>>>> }
>>>>>>>>> txq_schedule_end();
>>>>>>>>> 
>>>>>>>>> I do not see any chance of addition, no?
>>>>>>>> 
>>>>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>>>>> yeah,
>>>>>>>> we need to rebase this series to also incorporate that. I figure 
>>>>>>>> I
>>>>>>>> can
>>>>>>>> send an updated version of the first patch in the series once 
>>>>>>>> we've
>>>>>>>> worked out the remaining issues with your follow-up patches.
>>>>>>>> 
>>>>>>> Oh, I was thinking we were discussing without locking reduced. 
>>>>>>> Yes,
>>>>>>> I
>>>>>>> also agree there might be a case causing infinite loop. With 
>>>>>>> locking
>>>>>>> reduced, the tree can be adjusted between next_txq() and
>>>>>>> return_txq()
>>>>>>> in
>>>>>>> the loop situation. For further discussion, let 's consider,
>>>>>>> 1) the tree starts like:
>>>>>>>         A->B->C->D->E
>>>>>>> 2) then next_txq() returns A for dequeuing
>>>>>>> 3) driver dequeues A and draines A without any active txq locked
>>>>>>> meaning
>>>>>>> the tree could be changed upon Tx compeletion.
>>>>>>> 4) then in return_txq(), the tree could be,
>>>>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added 
>>>>>>> back
>>>>>>> before the loop end)
>>>>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added 
>>>>>>> back
>>>>>>> before the loop end)
>>>>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added 
>>>>>>> back
>>>>>>> before the loop end)
>>>>>>> 
>>>>>>> with this change:
>>>>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>> 
>>>>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and 
>>>>>>> in
>>>>>>> next_txq(), rb_next(B) is what we returns which actually is C and 
>>>>>>> B
>>>>>>> is
>>>>>>> skipped, no?
>>>>>>> 
>>>>>>> Similiar for case ii, we skip B, C, D.
>>>>>> 
>>>>>> Yup, I think you're right. But if we can fix this by making
>>>>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e.,
>>>>>> if
>>>>>> resort_txq() acts on the txq that's currently in schedule_pos, it
>>>>>> will
>>>>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>>>>> (optionally after checking that the position of the node is 
>>>>>> actually
>>>>>> going to change).
>>>>> Sorry, please igore last email sent by mistake.
>>>>> 
>>>>> I don't think it makes any difference with that in unschedule_txq().
>>>>> For
>>>>> case i, it finally picks C as well in next_txq(). For next_txq(),
>>>>> schedule_pos means previous candidate node whereas with your change,
>>>>> it
>>>>> looks like schedule_pos is current candidate node instead.
>>>> 
>>>> Hmm, that was not actually what I was thinking, but yeah I think 
>>>> you're
>>>> right that it would be easier to just change it so schedule_pos is
>>>> pointing to the next and not the current txq we want to schedule.
>>> So do you mean we can change next_txq like this,
>>> 
>>>   struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8
>>> ac)
>>>   {
>>>   	struct ieee80211_local *local = hw_to_local(hw);
>>> 	struct rb_node *node = local->schedule_pos[ac];
>>>   	struct txq_info *txqi = NULL;
>>> 	bool first = false;
>>> 
>>>   	lockdep_assert_held(&local->active_txq_lock[ac]);
>>> 
>>> 	if (!node) {
>>> 	        node = rb_first_cached(&local->active_txqs[ac]);
>>> 		first = true;
>>> -	} else
>>> -		node = rb_next(node);
>>> +       }
>>> +
>>> 	if (!node)
>>>   		return NULL;
>> 
>> Ah, no, now I remember why this didn't work and I went with the other
>> approach: If you make this change, you also have to have this at the
>> end:
>> 
>> local->schedule_pos[ac] = rb_next(node);
>> 
>> 
>> But this means we can no longer distinguish between having gone through
>> the whole thing (so rb_next() returns NULL), or starting out with
>> nothing.
>> 
>> So, instead we need to keep next_txq() the way it is, and just add
>
> Right, should keep next_txq() the way it is.
>
>> 
>> local->schedule_pos[ac] = rb_prev(node);
>> 
>> whenever we remove a node (both in return_txq() and resort_txq()).
>
> Agree, and also we may need to consider case like A is removed and soon 
> be added back just the same as ii),
>         B->C->A->D->E
> then B is schedule, removed and soon added back,
>         C->A->B->D->E
> A and B will have a second chance to be scheduled and this may happen to 
> others as well leading to the infinite loop as you have mentioned 
> previously, so do we need to maintain a schedule_round like we do in 
> DRR? Like,
>      - If the node is in the same round, by pass schedule, go to 
> rb_next(), either continue loop this round or end this round.
>      - Increase the schedule_round at the schedule_start() only when the 
> schedule_pos is NULL.

Hmm, yeah, I guess we could end up with a loop like that as well.
Keeping the schedule_round would be a way to fix it, but I'm not sure we
should just skip that station; maybe we should just end the round
instead?

>>>> We'd still need a check in resort_txq() then, but it would make it 
>>>> safe
>>>> to unschedule in return_txq()...
>>> Yes, agree with that.
>>> 
>>> 
>>>> 
>>>>>>> Also I am wondering if there will be some SMP issues relating with
>>>>>>> local->schedule_pos[ac].
>>>>>> 
>>>>>> Not sure what you mean by this?
>>>>> My bad. Please ignore this.
>>>>> 
>>>>> 
>>>>>> 
>>>>>>>>> In ath10k, we will usually push packets of first txq as many as 
>>>>>>>>> we
>>>>>>>>> can
>>>>>>>>> until it is drained and then move to the next one. So if a txq
>>>>>>>>> gets
>>>>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>>>>> during this period, neither vt of any station or global vt can 
>>>>>>>>> be
>>>>>>>>> updated due to lock protection.
>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> But in that case, we could fix it by just conditionally 
>>>>>>>>>> assigning
>>>>>>>>>> either
>>>>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? 
>>>>>>>>>> I.e.,
>>>>>>>>>> something like:
>>>>>>>>>> 
>>>>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>>>>> schedule_pos[ac] will lead us to the next node of the one we 
>>>>>>>>> want.
>>>>>>>> 
>>>>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL,
>>>>>>>> vs
>>>>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>>>>> scheduling round, while the latter ends the current round.
>>>>>>>> 
>>>>>>>> -Toke
>>>>>>> 
>>>>>>> --
>>>>>>> Yibo
>>>>> 
>>>>> --
>>>>> Yibo
>>> 
>>> --
>>> Yibo
>
> -- 
> Yibo


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-23 10:47                                 ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-23 10:47 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>> 
>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> In a loop txqs dequeue scenario, if the first txq in the
>>>>>>>>>>>>>>> rbtree
>>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>>> removed from rbtree immediately in the
>>>>>>>>>>>>>>> ieee80211_return_txq(),
>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>> loop will break soon in the ieee80211_next_txq() due to
>>>>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>>>>> not leading to the second txq in the rbtree. Thus, 
>>>>>>>>>>>>>>> defering
>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>> removal right before the end of this schedule round.
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>>> Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
>>>>>>>>>>>>>>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> I didn't write this patch, so please don't use my sign-off.
>>>>>>>>>>>>>> I'll
>>>>>>>>>>>>>> add
>>>>>>>>>>>>>> ack or review tags as appropriate in reply; but a few
>>>>>>>>>>>>>> comments
>>>>>>>>>>>>>> first:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>  include/net/mac80211.h     | 16 ++++++++++--
>>>>>>>>>>>>>>>  net/mac80211/ieee80211_i.h |  3 +++
>>>>>>>>>>>>>>>  net/mac80211/main.c        |  6 +++++
>>>>>>>>>>>>>>>  net/mac80211/tx.c          | 63
>>>>>>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++++++---
>>>>>>>>>>>>>>>  4 files changed, 83 insertions(+), 5 deletions(-)
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> diff --git a/include/net/mac80211.h 
>>>>>>>>>>>>>>> b/include/net/mac80211.h
>>>>>>>>>>>>>>> index ac2ed8e..ba5a345 100644
>>>>>>>>>>>>>>> --- a/include/net/mac80211.h
>>>>>>>>>>>>>>> +++ b/include/net/mac80211.h
>>>>>>>>>>>>>>> @@ -925,6 +925,8 @@ struct ieee80211_tx_rate {
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  #define IEEE80211_MAX_TX_RETRY		31
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +#define IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS 100
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  static inline void ieee80211_rate_set_vht(struct
>>>>>>>>>>>>>>> ieee80211_tx_rate
>>>>>>>>>>>>>>> *rate,
>>>>>>>>>>>>>>>  					  u8 mcs, u8 nss)
>>>>>>>>>>>>>>>  {
>>>>>>>>>>>>>>> @@ -6232,7 +6234,8 @@ struct sk_buff
>>>>>>>>>>>>>>> *ieee80211_tx_dequeue(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>>   * @ac: AC number to return packets from.
>>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>>   * Should only be called between calls to
>>>>>>>>>>>>>>> ieee80211_txq_schedule_start()
>>>>>>>>>>>>>>> - * and ieee80211_txq_schedule_end().
>>>>>>>>>>>>>>> + * and ieee80211_txq_schedule_end(). If the txq is empty,
>>>>>>>>>>>>>>> it
>>>>>>>>>>>>>>> will
>>>>>>>>>>>>>>> be
>>>>>>>>>>>>>>> added
>>>>>>>>>>>>>>> + * to a remove list and get removed later.
>>>>>>>>>>>>>>>   * Returns the next txq if successful, %NULL if no queue 
>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>> eligible.
>>>>>>>>>>>>>>> If a txq
>>>>>>>>>>>>>>>   * is returned, it should be returned with
>>>>>>>>>>>>>>> ieee80211_return_txq()
>>>>>>>>>>>>>>> after the
>>>>>>>>>>>>>>>   * driver has finished scheduling it.
>>>>>>>>>>>>>>> @@ -6268,7 +6271,8 @@ void
>>>>>>>>>>>>>>> ieee80211_txq_schedule_start(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw, u8 ac)
>>>>>>>>>>>>>>>   * @hw: pointer as obtained from ieee80211_alloc_hw()
>>>>>>>>>>>>>>>   * @ac: AC number to acquire locks for
>>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>> - * Release locks previously acquired by
>>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>>> + * Release locks previously acquired by
>>>>>>>>>>>>>>> ieee80211_txq_schedule_end().
>>>>>>>>>>>>>>> Check
>>>>>>>>>>>>>>> + * and remove the empty txq from rb-tree.
>>>>>>>>>>>>>>>   */
>>>>>>>>>>>>>>>  void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, 
>>>>>>>>>>>>>>> u8
>>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>>>  	__releases(txq_lock);
>>>>>>>>>>>>>>> @@ -6287,6 +6291,14 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw, struct ieee80211_txq *txq)
>>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  /**
>>>>>>>>>>>>>>> + * ieee80211_txqs_check - Check txqs waiting for removal
>>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>>> + * @tmr: pointer as obtained from local
>>>>>>>>>>>>>>> + *
>>>>>>>>>>>>>>> + */
>>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *tmr);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +/**
>>>>>>>>>>>>>>>   * ieee80211_txq_may_transmit - check whether TXQ is
>>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>> transmit
>>>>>>>>>>>>>>>   *
>>>>>>>>>>>>>>>   * This function is used to check whether given txq is
>>>>>>>>>>>>>>> allowed
>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>> transmit by
>>>>>>>>>>>>>>> diff --git a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> index a4556f9..49aa143e 100644
>>>>>>>>>>>>>>> --- a/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> +++ b/net/mac80211/ieee80211_i.h
>>>>>>>>>>>>>>> @@ -847,6 +847,7 @@ struct txq_info {
>>>>>>>>>>>>>>>  	struct codel_stats cstats;
>>>>>>>>>>>>>>>  	struct sk_buff_head frags;
>>>>>>>>>>>>>>>  	struct rb_node schedule_order;
>>>>>>>>>>>>>>> +	struct list_head candidate;
>>>>>>>>>>>>>>>  	unsigned long flags;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	/* keep last! */
>>>>>>>>>>>>>>> @@ -1145,6 +1146,8 @@ struct ieee80211_local {
>>>>>>>>>>>>>>>  	u64 airtime_v_t[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>>  	u64 airtime_weight_sum[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	struct list_head remove_list[IEEE80211_NUM_ACS];
>>>>>>>>>>>>>>> +	struct timer_list remove_timer;
>>>>>>>>>>>>>>>  	u16 airtime_flags;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	const struct ieee80211_ops *ops;
>>>>>>>>>>>>>>> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
>>>>>>>>>>>>>>> index e9ffa8e..78fe24a 100644
>>>>>>>>>>>>>>> --- a/net/mac80211/main.c
>>>>>>>>>>>>>>> +++ b/net/mac80211/main.c
>>>>>>>>>>>>>>> @@ -667,10 +667,15 @@ struct ieee80211_hw
>>>>>>>>>>>>>>> *ieee80211_alloc_hw_nm(size_t priv_data_len,
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
>>>>>>>>>>>>>>>  		local->active_txqs[i] = RB_ROOT_CACHED;
>>>>>>>>>>>>>>> +		INIT_LIST_HEAD(&local->remove_list[i]);
>>>>>>>>>>>>>>>  		spin_lock_init(&local->active_txq_lock[i]);
>>>>>>>>>>>>>>>  	}
>>>>>>>>>>>>>>>  	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	timer_setup(&local->remove_timer, ieee80211_txqs_check,
>>>>>>>>>>>>>>> 0);
>>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  	INIT_LIST_HEAD(&local->chanctx_list);
>>>>>>>>>>>>>>>  	mutex_init(&local->chanctx_mtx);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -1305,6 +1310,7 @@ void ieee80211_unregister_hw(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw)
>>>>>>>>>>>>>>>  	tasklet_kill(&local->tx_pending_tasklet);
>>>>>>>>>>>>>>>  	tasklet_kill(&local->tasklet);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	del_timer_sync(&local->remove_timer);
>>>>>>>>>>>>>>>  #ifdef CONFIG_INET
>>>>>>>>>>>>>>>  	unregister_inetaddr_notifier(&local->ifa_notifier);
>>>>>>>>>>>>>>>  #endif
>>>>>>>>>>>>>>> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
>>>>>>>>>>>>>>> index d00baaa..42ca010 100644
>>>>>>>>>>>>>>> --- a/net/mac80211/tx.c
>>>>>>>>>>>>>>> +++ b/net/mac80211/tx.c
>>>>>>>>>>>>>>> @@ -1450,6 +1450,7 @@ void ieee80211_txq_init(struct
>>>>>>>>>>>>>>> ieee80211_sub_if_data *sdata,
>>>>>>>>>>>>>>>  	codel_stats_init(&txqi->cstats);
>>>>>>>>>>>>>>>  	__skb_queue_head_init(&txqi->frags);
>>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>>> +	INIT_LIST_HEAD(&txqi->candidate);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	txqi->txq.vif = &sdata->vif;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -3724,6 +3725,9 @@ void ieee80211_schedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +	if (!list_empty(&txqi->candidate))
>>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order))
>>>>>>>>>>>>>>>  		goto out;
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -3783,6 +3787,20 @@ static void
>>>>>>>>>>>>>>> __ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>>  	RB_CLEAR_NODE(&txqi->schedule_order);
>>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +void ieee80211_remove_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>>> +			  struct ieee80211_txq *txq)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>>> +	struct txq_info *txqi = to_txq_info(txq);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
>>>>>>>>>>>>>>> +		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>>> +		list_del_init(&txqi->candidate);
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
>>>>>>>>>>>>>>>  			      struct ieee80211_txq *txq)
>>>>>>>>>>>>>>>  	__acquires(txq_lock) __releases(txq_lock)
>>>>>>>>>>>>>>> @@ -3790,7 +3808,7 @@ void ieee80211_unschedule_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw *hw,
>>>>>>>>>>>>>>>  	struct ieee80211_local *local = hw_to_local(hw);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	spin_lock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>> -	__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>>> +	ieee80211_remove_txq(hw, txq);
>>>>>>>>>>>>>>>  	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> @@ -3803,11 +3821,48 @@ void ieee80211_return_txq(struct
>>>>>>>>>>>>>>> ieee80211_hw
>>>>>>>>>>>>>>> *hw,
>>>>>>>>>>>>>>>  	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>  	if (!RB_EMPTY_NODE(&txqi->schedule_order) &&
>>>>>>>>>>>>>>> -	    (skb_queue_empty(&txqi->frags) &&
>>>>>>>>>>>>>>> !txqi->tin.backlog_packets))
>>>>>>>>>>>>>>> -		__ieee80211_unschedule_txq(hw, txq);
>>>>>>>>>>>>>>> +		!txq_has_queue(&txqi->txq) &&
>>>>>>>>>>>>>>> +		list_empty(&txqi->candidate))
>>>>>>>>>>>>>>> +		list_add_tail(&txqi->candidate,
>>>>>>>>>>>>>>> &local->remove_list[txq->ac]);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>  }
>>>>>>>>>>>>>>>  EXPORT_SYMBOL(ieee80211_return_txq);
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> +void __ieee80211_check_txqs(struct ieee80211_local 
>>>>>>>>>>>>>>> *local,
>>>>>>>>>>>>>>> int
>>>>>>>>>>>>>>> ac)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	lockdep_assert_held(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	list_for_each_entry_safe(iter, tmp,
>>>>>>>>>>>>>>> &local->remove_list[ac],
>>>>>>>>>>>>>>> +				 candidate) {
>>>>>>>>>>>>>>> +		sta = container_of(iter->txq.sta, struct sta_info, 
>>>>>>>>>>>>>>> sta);
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +		if (txq_has_queue(&iter->txq))
>>>>>>>>>>>>>>> +			list_del_init(&iter->candidate);
>>>>>>>>>>>>>>> +		else
>>>>>>>>>>>>>>> +			ieee80211_remove_txq(&local->hw, &iter->txq);
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +void ieee80211_txqs_check(struct timer_list *t)
>>>>>>>>>>>>>>> +{
>>>>>>>>>>>>>>> +	struct ieee80211_local *local = from_timer(local, t,
>>>>>>>>>>>>>>> remove_timer);
>>>>>>>>>>>>>>> +	struct txq_info *iter, *tmp;
>>>>>>>>>>>>>>> +	struct sta_info *sta;
>>>>>>>>>>>>>>> +	int ac;
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
>>>>>>>>>>>>>>> +		spin_lock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> +		__ieee80211_check_txqs(local, ac);
>>>>>>>>>>>>>>> +		spin_unlock_bh(&local->active_txq_lock[ac]);
>>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +	mod_timer(&local->remove_timer,
>>>>>>>>>>>>>>> +		  jiffies +
>>>>>>>>>>>>>>> msecs_to_jiffies(IEEE80211_AIRTIME_TXQ_RM_CHK_INTV_IN_MS));
>>>>>>>>>>>>>>> +}
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> I'll ask the same as I did last time (where you told me to
>>>>>>>>>>>>>> hold
>>>>>>>>>>>>>> off
>>>>>>>>>>>>>> until this round):
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Why do you need the timer and the periodic check? If TXQs 
>>>>>>>>>>>>>> are
>>>>>>>>>>>>>> added
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> the remove list during the scheduling run, and
>>>>>>>>>>>>>> __ieee80211_check_txqs()
>>>>>>>>>>>>>> is run from schedule_end(), isn't that sufficient to clear
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> list?
>>>>>>>>>>>>> Is it possible that a txq is not added to the remove list 
>>>>>>>>>>>>> but
>>>>>>>>>>>>> then
>>>>>>>>>>>>> packets in it are dropped by fq_codel algo? Like the station
>>>>>>>>>>>>> disconnects
>>>>>>>>>>>>> without any notification.
>>>>>>>>>>>> 
>>>>>>>>>>>> Well as long as all the other cleanup paths call directly 
>>>>>>>>>>>> into
>>>>>>>>>>>> __unschedule_txq(), that should remove stations from the
>>>>>>>>>>>> scheduler
>>>>>>>>>>>> when
>>>>>>>>>>>> they disconnect etc.
>>>>>>>>>>> Yes, the disconnect scenario is a bad example. My concern is,
>>>>>>>>>>> say,
>>>>>>>>>>> we
>>>>>>>>>>> have 10 stations and only one of them is assigned a very small
>>>>>>>>>>> weight
>>>>>>>>>>> compared with that of others. Suppose, after its chance of Tx,
>>>>>>>>>>> it
>>>>>>>>>>> is
>>>>>>>>>>> most likely to be placed in the rightmost(still has some 
>>>>>>>>>>> packets
>>>>>>>>>>> in
>>>>>>>>>>> the
>>>>>>>>>>> txq) and no more incoming data for it. The remaining packets 
>>>>>>>>>>> in
>>>>>>>>>>> txq
>>>>>>>>>>> will
>>>>>>>>>>> be dropped due to timeout algo in codel(correct me if I am
>>>>>>>>>>> wrong)
>>>>>>>>>>> but
>>>>>>>>>>> this empty txq will stay on the rbtree until other txqs get
>>>>>>>>>>> drained
>>>>>>>>>>> or
>>>>>>>>>>> global vt catch up with its vt. The staying time could be long
>>>>>>>>>>> if
>>>>>>>>>>> weight
>>>>>>>>>>> is extremely small. Then do we need timer to check or any 
>>>>>>>>>>> other
>>>>>>>>>>> better
>>>>>>>>>>> solution?
>>>>>>>>>> 
>>>>>>>>>> Ah, I see what you mean. No, I don't think this will be a
>>>>>>>>>> problem;
>>>>>>>>>> the
>>>>>>>>>> scenario you're describing would play out like this:
>>>>>>>>>> 
>>>>>>>>>> 1. Station ends transmitting, still has a single packet queued,
>>>>>>>>>> gets
>>>>>>>>>>    moved to the end of the rbtree (and stays there for a 
>>>>>>>>>> while).
>>>>>>>>>> 
>>>>>>>>>> 2. When we finally get to the point where this station gets
>>>>>>>>>> another
>>>>>>>>>>    chance to transmit, the CoDel drop timer triggers and the 
>>>>>>>>>> last
>>>>>>>>>> packet
>>>>>>>>>>    is dropped[0]. This means that the queue will just be empty
>>>>>>>>>>    (and ieee80211_tx_dequeue() will return NULL).
>>>>>>>>>> 
>>>>>>>>>> 3. Because the queue is empty, ieee80211_return_txq() will not
>>>>>>>>>> put
>>>>>>>>>> it
>>>>>>>>>>    back on the rbtree.
>>>>>>>>>> 
>>>>>>>>>> Crucially, in 2. the CoDel algorithm doesn't kick in until the
>>>>>>>>>> point
>>>>>>>>>> of
>>>>>>>>>> packet dequeue. But even if an empty queue stays on the rbtree
>>>>>>>>>> for
>>>>>>>>>> a
>>>>>>>>>> while, there is no harm in that: eventually it will get its 
>>>>>>>>>> turn,
>>>>>>>>>> it
>>>>>>>>>> will turn out to be empty, and just be skipped over.
>>>>>>>>> Then that will be fine. Thanks for the explanation of the 
>>>>>>>>> dropping
>>>>>>>>> part
>>>>>>>>> in CoDel algorithm.
>>>>>>>> 
>>>>>>>> Yup, think so. And you're welcome :)
>>>>>>>> 
>>>>>>>>>> The issue we need to be concerned about is the opposite: If we
>>>>>>>>>> have
>>>>>>>>>> a
>>>>>>>>>> queue that *does* have packets queued, but which is *not*
>>>>>>>>>> scheduled
>>>>>>>>>> for
>>>>>>>>>> transmission, that will stall TX.
>>>>>>>>> Is it by design since its vt is more than global vt, right? The
>>>>>>>>> lattency
>>>>>>>>> may somehow get impacted though.
>>>>>>>> 
>>>>>>>> Well, it should still stay on the rbtree as long as it has 
>>>>>>>> packets
>>>>>>>> queued. We don't have a check anywhere that reschedules TXQs 
>>>>>>>> whose
>>>>>>>> v_t
>>>>>>>> drops below global v_t...
>>>>>>>> 
>>>>>>>>>> [0] CoDel in most cases only drops a single packet at a time, 
>>>>>>>>>> so
>>>>>>>>>> it
>>>>>>>>>> will
>>>>>>>>>> not clear out an entire queue with multiple packets in one go.
>>>>>>>>>> But
>>>>>>>>>> you
>>>>>>>>>> are right that it could conceivably drop the last packet in a
>>>>>>>>>> queue.
>>>>>>>>>> 
>>>>>>>>>>>> We only need to defer removal inside a single "scheduling
>>>>>>>>>>>> round"
>>>>>>>>>>>> (i.e.,
>>>>>>>>>>>> between a pair of ieee80211_txq_schedule_start/end. So if we
>>>>>>>>>>>> just
>>>>>>>>>>>> walk
>>>>>>>>>>>> the remove list in schedule_end() we should be enough, no?
>>>>>>>>>>>> 
>>>>>>>>>>>> Hmm, or maybe a simpler way to fix the original issue is just
>>>>>>>>>>>> to
>>>>>>>>>>>> have
>>>>>>>>>>>> unschedule_txq() update the schedule_pos() pointer?
>>>>>>>>>>>> 
>>>>>>>>>>>> I.e., unschedule_txq checks if the txq being removed is
>>>>>>>>>>>> currently
>>>>>>>>>>>> being
>>>>>>>>>>>> pointed to by schedule_pos[ac], and if it is, it updates
>>>>>>>>>>>> schedule_pos
>>>>>>>>>>>> to
>>>>>>>>>>>> be the rb_next of the current value?
>>>>>>>>>>> Actually, if schedule_pos is updated to rb_next of the current
>>>>>>>>>>> value,
>>>>>>>>>>> then in the next_txq() where we are going to use rb_next again
>>>>>>>>>>> and
>>>>>>>>>>> finally pick the next node of the node we really want. Is it
>>>>>>>>>>> fine
>>>>>>>>>>> to
>>>>>>>>>>> update schedule_pos to NULL?
>>>>>>>>>> 
>>>>>>>>>> Hmm, yeah, good point.
>>>>>>>>>> 
>>>>>>>>>> If we do end up setting schedule_pos to NULL in the middle of a
>>>>>>>>>> scheduling round, that will make next_txq() "start over", and 
>>>>>>>>>> do
>>>>>>>>>> another
>>>>>>>>>> loop through the whole thing. I guess we may be able hit a case
>>>>>>>>>> where
>>>>>>>>>> things can oscillate back and forth between addition and 
>>>>>>>>>> removal
>>>>>>>>>> resulting in an infinite loop? Not sure, but at least I can't
>>>>>>>>>> seem
>>>>>>>>>> to
>>>>>>>>>> convince myself that this can't happen.
>>>>>>>>> 
>>>>>>>>> As the loop of next_txq under lock protection as below,
>>>>>>>>> 
>>>>>>>>> txq_schedule_start();
>>>>>>>>> while(txq=next_txq()){
>>>>>>>>> ...
>>>>>>>>> return_txq(txq);
>>>>>>>>> }
>>>>>>>>> txq_schedule_end();
>>>>>>>>> 
>>>>>>>>> I do not see any chance of addition, no?
>>>>>>>> 
>>>>>>>> As you noted in your other email, Felix reduced the locking. And
>>>>>>>> yeah,
>>>>>>>> we need to rebase this series to also incorporate that. I figure 
>>>>>>>> I
>>>>>>>> can
>>>>>>>> send an updated version of the first patch in the series once 
>>>>>>>> we've
>>>>>>>> worked out the remaining issues with your follow-up patches.
>>>>>>>> 
>>>>>>> Oh, I was thinking we were discussing without locking reduced. 
>>>>>>> Yes,
>>>>>>> I
>>>>>>> also agree there might be a case causing infinite loop. With 
>>>>>>> locking
>>>>>>> reduced, the tree can be adjusted between next_txq() and
>>>>>>> return_txq()
>>>>>>> in
>>>>>>> the loop situation. For further discussion, let 's consider,
>>>>>>> 1) the tree starts like:
>>>>>>>         A->B->C->D->E
>>>>>>> 2) then next_txq() returns A for dequeuing
>>>>>>> 3) driver dequeues A and draines A without any active txq locked
>>>>>>> meaning
>>>>>>> the tree could be changed upon Tx compeletion.
>>>>>>> 4) then in return_txq(), the tree could be,
>>>>>>>         i   A->B->C->D->E (A is empty, and maybe soon be added 
>>>>>>> back
>>>>>>> before the loop end)
>>>>>>>         ii  B->C->A->D->E (A is empty, and maybe soon be added 
>>>>>>> back
>>>>>>> before the loop end)
>>>>>>>         iii B->C->D->E->A (A is empty, and maybe soon be added 
>>>>>>> back
>>>>>>> before the loop end)
>>>>>>> 
>>>>>>> with this change:
>>>>>>>   local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>> 
>>>>>>> for case i, local->schedule_pos[ac] is rb_next(A) which is B, and 
>>>>>>> in
>>>>>>> next_txq(), rb_next(B) is what we returns which actually is C and 
>>>>>>> B
>>>>>>> is
>>>>>>> skipped, no?
>>>>>>> 
>>>>>>> Similiar for case ii, we skip B, C, D.
>>>>>> 
>>>>>> Yup, I think you're right. But if we can fix this by making
>>>>>> ieee80211_resort_txq() aware of the schedule_pos as well, no? I.e.,
>>>>>> if
>>>>>> resort_txq() acts on the txq that's currently in schedule_pos, it
>>>>>> will
>>>>>> update schedule pos with the same rb_next(node) ?: rb_prev(node);
>>>>>> (optionally after checking that the position of the node is 
>>>>>> actually
>>>>>> going to change).
>>>>> Sorry, please igore last email sent by mistake.
>>>>> 
>>>>> I don't think it makes any difference with that in unschedule_txq().
>>>>> For
>>>>> case i, it finally picks C as well in next_txq(). For next_txq(),
>>>>> schedule_pos means previous candidate node whereas with your change,
>>>>> it
>>>>> looks like schedule_pos is current candidate node instead.
>>>> 
>>>> Hmm, that was not actually what I was thinking, but yeah I think 
>>>> you're
>>>> right that it would be easier to just change it so schedule_pos is
>>>> pointing to the next and not the current txq we want to schedule.
>>> So do you mean we can change next_txq like this,
>>> 
>>>   struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8
>>> ac)
>>>   {
>>>   	struct ieee80211_local *local = hw_to_local(hw);
>>> 	struct rb_node *node = local->schedule_pos[ac];
>>>   	struct txq_info *txqi = NULL;
>>> 	bool first = false;
>>> 
>>>   	lockdep_assert_held(&local->active_txq_lock[ac]);
>>> 
>>> 	if (!node) {
>>> 	        node = rb_first_cached(&local->active_txqs[ac]);
>>> 		first = true;
>>> -	} else
>>> -		node = rb_next(node);
>>> +       }
>>> +
>>> 	if (!node)
>>>   		return NULL;
>> 
>> Ah, no, now I remember why this didn't work and I went with the other
>> approach: If you make this change, you also have to have this at the
>> end:
>> 
>> local->schedule_pos[ac] = rb_next(node);
>> 
>> 
>> But this means we can no longer distinguish between having gone through
>> the whole thing (so rb_next() returns NULL), or starting out with
>> nothing.
>> 
>> So, instead we need to keep next_txq() the way it is, and just add
>
> Right, should keep next_txq() the way it is.
>
>> 
>> local->schedule_pos[ac] = rb_prev(node);
>> 
>> whenever we remove a node (both in return_txq() and resort_txq()).
>
> Agree, and also we may need to consider case like A is removed and soon 
> be added back just the same as ii),
>         B->C->A->D->E
> then B is schedule, removed and soon added back,
>         C->A->B->D->E
> A and B will have a second chance to be scheduled and this may happen to 
> others as well leading to the infinite loop as you have mentioned 
> previously, so do we need to maintain a schedule_round like we do in 
> DRR? Like,
>      - If the node is in the same round, by pass schedule, go to 
> rb_next(), either continue loop this round or end this round.
>      - Increase the schedule_round at the schedule_start() only when the 
> schedule_pos is NULL.

Hmm, yeah, I guess we could end up with a loop like that as well.
Keeping the schedule_round would be a way to fix it, but I'm not sure we
should just skip that station; maybe we should just end the round
instead?

>>>> We'd still need a check in resort_txq() then, but it would make it 
>>>> safe
>>>> to unschedule in return_txq()...
>>> Yes, agree with that.
>>> 
>>> 
>>>> 
>>>>>>> Also I am wondering if there will be some SMP issues relating with
>>>>>>> local->schedule_pos[ac].
>>>>>> 
>>>>>> Not sure what you mean by this?
>>>>> My bad. Please ignore this.
>>>>> 
>>>>> 
>>>>>> 
>>>>>>>>> In ath10k, we will usually push packets of first txq as many as 
>>>>>>>>> we
>>>>>>>>> can
>>>>>>>>> until it is drained and then move to the next one. So if a txq
>>>>>>>>> gets
>>>>>>>>> removed in the return_txq, it should always be the leftmost. And
>>>>>>>>> during this period, neither vt of any station or global vt can 
>>>>>>>>> be
>>>>>>>>> updated due to lock protection.
>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> But in that case, we could fix it by just conditionally 
>>>>>>>>>> assigning
>>>>>>>>>> either
>>>>>>>>>> rb_next or rb_prev to the schedule_pos in unschedule_txq()? 
>>>>>>>>>> I.e.,
>>>>>>>>>> something like:
>>>>>>>>>> 
>>>>>>>>>> local->schedule_pos[ac] = rb_next(node) ?: rb_prev(node);
>>>>>>>>> I am not sure I am getting your point. Still in next_txq,
>>>>>>>>> schedule_pos[ac] will lead us to the next node of the one we 
>>>>>>>>> want.
>>>>>>>> 
>>>>>>>> The logic in next_txq is different when schedule_pos[ac] is NULL,
>>>>>>>> vs
>>>>>>>> when rb_next(schedule_pos[ac]) is NULL. The former restarts a new
>>>>>>>> scheduling round, while the latter ends the current round.
>>>>>>>> 
>>>>>>>> -Toke
>>>>>>> 
>>>>>>> --
>>>>>>> Yibo
>>>>> 
>>>>> --
>>>>> Yibo
>>> 
>>> --
>>> Yibo
>
> -- 
> Yibo


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-23 10:47                                 ` Toke Høiland-Jørgensen
@ 2019-09-23 11:42                                   ` Kalle Valo
  -1 siblings, 0 replies; 86+ messages in thread
From: Kalle Valo @ 2019-09-23 11:42 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Yibo Zhao, linux-wireless, ath10k

Toke Høiland-Jørgensen <toke@redhat.com> writes:

> Yibo Zhao <yiboz@codeaurora.org> writes:
>
>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:

Guys, PLEASE please consider us poor maintainers drowning in email and
edit your quotes :) This style of discussion makes patchwork unusable:

https://patchwork.kernel.org/patch/11147019/

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-23 11:42                                   ` Kalle Valo
  0 siblings, 0 replies; 86+ messages in thread
From: Kalle Valo @ 2019-09-23 11:42 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Yibo Zhao, linux-wireless, ath10k

Toke Høiland-Jørgensen <toke@redhat.com> writes:

> Yibo Zhao <yiboz@codeaurora.org> writes:
>
>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> 
>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:

Guys, PLEASE please consider us poor maintainers drowning in email and
edit your quotes :) This style of discussion makes patchwork unusable:

https://patchwork.kernel.org/patch/11147019/

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-23 11:42                                   ` Kalle Valo
@ 2019-09-23 16:39                                     ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-23 16:39 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Yibo Zhao, linux-wireless, ath10k

Kalle Valo <kvalo@codeaurora.org> writes:

> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>
>>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>
> Guys, PLEASE please consider us poor maintainers drowning in email and
> edit your quotes :) This style of discussion makes patchwork unusable:
>
> https://patchwork.kernel.org/patch/11147019/

Heh, oops, didn't realise you were following the discussion from
patchwork; sorry, will be sure to cut things in the future.

The quote marks do make a very nice (reverse) christmas tree, though ;)

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-23 16:39                                     ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-23 16:39 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Yibo Zhao, linux-wireless, ath10k

Kalle Valo <kvalo@codeaurora.org> writes:

> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>
>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>
>>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>> 
>>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>
> Guys, PLEASE please consider us poor maintainers drowning in email and
> edit your quotes :) This style of discussion makes patchwork unusable:
>
> https://patchwork.kernel.org/patch/11147019/

Heh, oops, didn't realise you were following the discussion from
patchwork; sorry, will be sure to cut things in the future.

The quote marks do make a very nice (reverse) christmas tree, though ;)

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-23 10:47                                 ` Toke Høiland-Jørgensen
@ 2019-09-24  2:45                                   ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-24  2:45 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-23 18:47, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> So, instead we need to keep next_txq() the way it is, and just add
>> 
>> Right, should keep next_txq() the way it is.
>> 
>>> 
>>> local->schedule_pos[ac] = rb_prev(node);
>>> 
>>> whenever we remove a node (both in return_txq() and resort_txq()).
>> 
>> Agree, and also we may need to consider case like A is removed and 
>> soon
>> be added back just the same as ii),
>>         B->C->A->D->E
>> then B is schedule, removed and soon added back,
>>         C->A->B->D->E
>> A and B will have a second chance to be scheduled and this may happen 
>> to
>> others as well leading to the infinite loop as you have mentioned
>> previously, so do we need to maintain a schedule_round like we do in
>> DRR? Like,
>>      - If the node is in the same round, by pass schedule, go to
>> rb_next(), either continue loop this round or end this round.
>>      - Increase the schedule_round at the schedule_start() only when 
>> the
>> schedule_pos is NULL.
> 
> Hmm, yeah, I guess we could end up with a loop like that as well.
> Keeping the schedule_round would be a way to fix it, but I'm not sure 
> we
> should just skip that station; maybe we should just end the round
> instead?
I am not sure. I believe, in some cases, the rest of the nodes which 
could be most of the nodes in the tree will not have the chance to be 
scheduled in this round.

> 
>>>>> We'd still need a check in resort_txq() then, but it would make it
>>>>> safe
>>>>> to unschedule in return_txq()...
>>>> Yes, agree with that.
>>>> 



-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-24  2:45                                   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-24  2:45 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-23 18:47, Toke Høiland-Jørgensen wrote:
> Yibo Zhao <yiboz@codeaurora.org> writes:
>>> So, instead we need to keep next_txq() the way it is, and just add
>> 
>> Right, should keep next_txq() the way it is.
>> 
>>> 
>>> local->schedule_pos[ac] = rb_prev(node);
>>> 
>>> whenever we remove a node (both in return_txq() and resort_txq()).
>> 
>> Agree, and also we may need to consider case like A is removed and 
>> soon
>> be added back just the same as ii),
>>         B->C->A->D->E
>> then B is schedule, removed and soon added back,
>>         C->A->B->D->E
>> A and B will have a second chance to be scheduled and this may happen 
>> to
>> others as well leading to the infinite loop as you have mentioned
>> previously, so do we need to maintain a schedule_round like we do in
>> DRR? Like,
>>      - If the node is in the same round, by pass schedule, go to
>> rb_next(), either continue loop this round or end this round.
>>      - Increase the schedule_round at the schedule_start() only when 
>> the
>> schedule_pos is NULL.
> 
> Hmm, yeah, I guess we could end up with a loop like that as well.
> Keeping the schedule_round would be a way to fix it, but I'm not sure 
> we
> should just skip that station; maybe we should just end the round
> instead?
I am not sure. I believe, in some cases, the rest of the nodes which 
could be most of the nodes in the tree will not have the chance to be 
scheduled in this round.

> 
>>>>> We'd still need a check in resort_txq() then, but it would make it
>>>>> safe
>>>>> to unschedule in return_txq()...
>>>> Yes, agree with that.
>>>> 



-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-23 16:39                                     ` Toke Høiland-Jørgensen
@ 2019-09-24  5:27                                       ` Kalle Valo
  -1 siblings, 0 replies; 86+ messages in thread
From: Kalle Valo @ 2019-09-24  5:27 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Yibo Zhao, linux-wireless, ath10k

Toke Høiland-Jørgensen <toke@redhat.com> writes:

> Kalle Valo <kvalo@codeaurora.org> writes:
>
>> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>>
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>
>>>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>
>> Guys, PLEASE please consider us poor maintainers drowning in email and
>> edit your quotes :) This style of discussion makes patchwork unusable:
>>
>> https://patchwork.kernel.org/patch/11147019/
>
> Heh, oops, didn't realise you were following the discussion from
> patchwork; sorry, will be sure to cut things in the future.

To be honest, I'm not sure how much Johannes uses patchwork. But I check
everything from patchwork 95% of the time and try to keep my email boxes
clean.

> The quote marks do make a very nice (reverse) christmas tree, though ;)

It did! I had to include that to my rant :)

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-24  5:27                                       ` Kalle Valo
  0 siblings, 0 replies; 86+ messages in thread
From: Kalle Valo @ 2019-09-24  5:27 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Yibo Zhao, linux-wireless, ath10k

Toke Høiland-Jørgensen <toke@redhat.com> writes:

> Kalle Valo <kvalo@codeaurora.org> writes:
>
>> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>>
>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>
>>>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>> 
>>>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>> 
>>>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>> 
>>>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>> 
>>>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>
>> Guys, PLEASE please consider us poor maintainers drowning in email and
>> edit your quotes :) This style of discussion makes patchwork unusable:
>>
>> https://patchwork.kernel.org/patch/11147019/
>
> Heh, oops, didn't realise you were following the discussion from
> patchwork; sorry, will be sure to cut things in the future.

To be honest, I'm not sure how much Johannes uses patchwork. But I check
everything from patchwork 95% of the time and try to keep my email boxes
clean.

> The quote marks do make a very nice (reverse) christmas tree, though ;)

It did! I had to include that to my rant :)

-- 
Kalle Valo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-24  5:27                                       ` Kalle Valo
@ 2019-09-24  7:23                                         ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-24  7:23 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Yibo Zhao, linux-wireless, ath10k

Kalle Valo <kvalo@codeaurora.org> writes:

> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>
>> Kalle Valo <kvalo@codeaurora.org> writes:
>>
>>> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>>>
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>
>>>>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>
>>> Guys, PLEASE please consider us poor maintainers drowning in email and
>>> edit your quotes :) This style of discussion makes patchwork unusable:
>>>
>>> https://patchwork.kernel.org/patch/11147019/
>>
>> Heh, oops, didn't realise you were following the discussion from
>> patchwork; sorry, will be sure to cut things in the future.
>
> To be honest, I'm not sure how much Johannes uses patchwork. But I
> check everything from patchwork 95% of the time and try to keep my
> email boxes clean.

Noted. I'll try to be nice to patchwork, then :)

>> The quote marks do make a very nice (reverse) christmas tree, though ;)
>
> It did! I had to include that to my rant :)

:D

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-24  7:23                                         ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-24  7:23 UTC (permalink / raw)
  To: Kalle Valo; +Cc: Yibo Zhao, linux-wireless, ath10k

Kalle Valo <kvalo@codeaurora.org> writes:

> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>
>> Kalle Valo <kvalo@codeaurora.org> writes:
>>
>>> Toke Høiland-Jørgensen <toke@redhat.com> writes:
>>>
>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>
>>>>> On 2019-09-21 22:00, Toke Høiland-Jørgensen wrote:
>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>> 
>>>>>>> On 2019-09-21 21:02, Toke Høiland-Jørgensen wrote:
>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>> 
>>>>>>>>> On 2019-09-21 19:27, Toke Høiland-Jørgensen wrote:
>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>> 
>>>>>>>>>>> On 2019-09-20 17:15, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>> 
>>>>>>>>>>>>> On 2019-09-19 18:37, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>> On 2019-09-18 19:23, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>>>>>>>>>>>>>> 
>>>>>>>>>>>>>>>>> On 2019-09-18 05:10, Toke Høiland-Jørgensen wrote:
>>>>>>>>>>>>>>>>>> Yibo Zhao <yiboz@codeaurora.org> writes:
>>>
>>> Guys, PLEASE please consider us poor maintainers drowning in email and
>>> edit your quotes :) This style of discussion makes patchwork unusable:
>>>
>>> https://patchwork.kernel.org/patch/11147019/
>>
>> Heh, oops, didn't realise you were following the discussion from
>> patchwork; sorry, will be sure to cut things in the future.
>
> To be honest, I'm not sure how much Johannes uses patchwork. But I
> check everything from patchwork 95% of the time and try to keep my
> email boxes clean.

Noted. I'll try to be nice to patchwork, then :)

>> The quote marks do make a very nice (reverse) christmas tree, though ;)
>
> It did! I had to include that to my rant :)

:D

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-24  2:45                                   ` Yibo Zhao
@ 2019-09-24  7:26                                     ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-24  7:26 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

>> Hmm, yeah, I guess we could end up with a loop like that as well.
>> Keeping the schedule_round would be a way to fix it, but I'm not sure 
>> we
>> should just skip that station; maybe we should just end the round
>> instead?
> I am not sure. I believe, in some cases, the rest of the nodes which 
> could be most of the nodes in the tree will not have the chance to be 
> scheduled in this round.

My guess would be that it doesn't really matter, because in most cases
each schedule round will only actually end up queueing packets from one
or two stations; as the driver will pull multiple packets from that one
station which will often fill up the firmware queues (especially once we
start throttling that with the AQL stuff).

So I guess we can just skip TXQs that we've already seen this scheduling
round, and let the v_t compare determine transmit eligibility :)

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-24  7:26                                     ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-24  7:26 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

>> Hmm, yeah, I guess we could end up with a loop like that as well.
>> Keeping the schedule_round would be a way to fix it, but I'm not sure 
>> we
>> should just skip that station; maybe we should just end the round
>> instead?
> I am not sure. I believe, in some cases, the rest of the nodes which 
> could be most of the nodes in the tree will not have the chance to be 
> scheduled in this round.

My guess would be that it doesn't really matter, because in most cases
each schedule round will only actually end up queueing packets from one
or two stations; as the driver will pull multiple packets from that one
station which will often fill up the firmware queues (especially once we
start throttling that with the AQL stuff).

So I guess we can just skip TXQs that we've already seen this scheduling
round, and let the v_t compare determine transmit eligibility :)

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-24  7:26                                     ` Toke Høiland-Jørgensen
@ 2019-09-24  8:31                                       ` Yibo Zhao
  -1 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-24  8:31 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: ath10k, linux-wireless, linux-wireless-owner

On 2019-09-24 15:26, Toke Høiland-Jørgensen wrote:
>>> Hmm, yeah, I guess we could end up with a loop like that as well.
>>> Keeping the schedule_round would be a way to fix it, but I'm not sure
>>> we
>>> should just skip that station; maybe we should just end the round
>>> instead?
>> I am not sure. I believe, in some cases, the rest of the nodes which
>> could be most of the nodes in the tree will not have the chance to be
>> scheduled in this round.
> 
> My guess would be that it doesn't really matter, because in most cases
> each schedule round will only actually end up queueing packets from one
> or two stations; as the driver will pull multiple packets from that one
> station which will often fill up the firmware queues (especially once 
> we
> start throttling that with the AQL stuff).
> 
> So I guess we can just skip TXQs that we've already seen this 
> scheduling
> round, and let the v_t compare determine transmit eligibility :)

I am a little confused. So do you mean it is fine for you to skip the 
TXQs we met in this round before and continue the loop until the end or 
vt comparison failure?

> 
> -Toke

-- 
Yibo

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-24  8:31                                       ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-09-24  8:31 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen
  Cc: linux-wireless-owner, linux-wireless, ath10k

On 2019-09-24 15:26, Toke Høiland-Jørgensen wrote:
>>> Hmm, yeah, I guess we could end up with a loop like that as well.
>>> Keeping the schedule_round would be a way to fix it, but I'm not sure
>>> we
>>> should just skip that station; maybe we should just end the round
>>> instead?
>> I am not sure. I believe, in some cases, the rest of the nodes which
>> could be most of the nodes in the tree will not have the chance to be
>> scheduled in this round.
> 
> My guess would be that it doesn't really matter, because in most cases
> each schedule round will only actually end up queueing packets from one
> or two stations; as the driver will pull multiple packets from that one
> station which will often fill up the firmware queues (especially once 
> we
> start throttling that with the AQL stuff).
> 
> So I guess we can just skip TXQs that we've already seen this 
> scheduling
> round, and let the v_t compare determine transmit eligibility :)

I am a little confused. So do you mean it is fine for you to skip the 
TXQs we met in this round before and continue the loop until the end or 
vt comparison failure?

> 
> -Toke

-- 
Yibo

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
  2019-09-24  8:31                                       ` Yibo Zhao
@ 2019-09-24  8:44                                         ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-24  8:44 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: ath10k, linux-wireless, linux-wireless-owner

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-24 15:26, Toke Høiland-Jørgensen wrote:
>>>> Hmm, yeah, I guess we could end up with a loop like that as well.
>>>> Keeping the schedule_round would be a way to fix it, but I'm not sure
>>>> we
>>>> should just skip that station; maybe we should just end the round
>>>> instead?
>>> I am not sure. I believe, in some cases, the rest of the nodes which
>>> could be most of the nodes in the tree will not have the chance to be
>>> scheduled in this round.
>> 
>> My guess would be that it doesn't really matter, because in most cases
>> each schedule round will only actually end up queueing packets from one
>> or two stations; as the driver will pull multiple packets from that one
>> station which will often fill up the firmware queues (especially once 
>> we
>> start throttling that with the AQL stuff).
>> 
>> So I guess we can just skip TXQs that we've already seen this 
>> scheduling
>> round, and let the v_t compare determine transmit eligibility :)
>
> I am a little confused. So do you mean it is fine for you to skip the 
> TXQs we met in this round before and continue the loop until the end or 
> vt comparison failure?

Yeah. In most cases it won't make any difference; but it'll make sure we
visit all eligible TXQs in all cases, so we might as well do that if
we're tracking the scheduling round anyway.

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 2/4] mac80211: defer txqs removal from rbtree
@ 2019-09-24  8:44                                         ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-09-24  8:44 UTC (permalink / raw)
  To: Yibo Zhao; +Cc: linux-wireless-owner, linux-wireless, ath10k

Yibo Zhao <yiboz@codeaurora.org> writes:

> On 2019-09-24 15:26, Toke Høiland-Jørgensen wrote:
>>>> Hmm, yeah, I guess we could end up with a loop like that as well.
>>>> Keeping the schedule_round would be a way to fix it, but I'm not sure
>>>> we
>>>> should just skip that station; maybe we should just end the round
>>>> instead?
>>> I am not sure. I believe, in some cases, the rest of the nodes which
>>> could be most of the nodes in the tree will not have the chance to be
>>> scheduled in this round.
>> 
>> My guess would be that it doesn't really matter, because in most cases
>> each schedule round will only actually end up queueing packets from one
>> or two stations; as the driver will pull multiple packets from that one
>> station which will often fill up the firmware queues (especially once 
>> we
>> start throttling that with the AQL stuff).
>> 
>> So I guess we can just skip TXQs that we've already seen this 
>> scheduling
>> round, and let the v_t compare determine transmit eligibility :)
>
> I am a little confused. So do you mean it is fine for you to skip the 
> TXQs we met in this round before and continue the loop until the end or 
> vt comparison failure?

Yeah. In most cases it won't make any difference; but it'll make sure we
visit all eligible TXQs in all cases, so we might as well do that if
we're tracking the scheduling round anyway.

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
  2019-12-13  9:58     ` Johannes Berg
@ 2019-12-17 15:20       ` Toke Høiland-Jørgensen
  -1 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-12-17 15:20 UTC (permalink / raw)
  To: Johannes Berg, Yibo Zhao, linux-wireless; +Cc: ath10k

Johannes Berg <johannes@sipsolutions.net> writes:

> I'm going to assume that Toke will review all of this and there will be
> changes, so you'd resend anyway ...

Yeah, this series doesn't even apply in its current form. I'll try to
fix that, and do a few other updates that are needed while I'm at it.
And to answer your question in the other email, yeah, this should
probably be squashed to a single patch...

-Toke


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
@ 2019-12-17 15:20       ` Toke Høiland-Jørgensen
  0 siblings, 0 replies; 86+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-12-17 15:20 UTC (permalink / raw)
  To: Johannes Berg, Yibo Zhao, linux-wireless; +Cc: ath10k

Johannes Berg <johannes@sipsolutions.net> writes:

> I'm going to assume that Toke will review all of this and there will be
> changes, so you'd resend anyway ...

Yeah, this series doesn't even apply in its current form. I'll try to
fix that, and do a few other updates that are needed while I'm at it.
And to answer your question in the other email, yeah, this should
probably be squashed to a single patch...

-Toke


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
  2019-12-13  7:19   ` Yibo Zhao
@ 2019-12-13  9:58     ` Johannes Berg
  -1 siblings, 0 replies; 86+ messages in thread
From: Johannes Berg @ 2019-12-13  9:58 UTC (permalink / raw)
  To: Yibo Zhao, linux-wireless; +Cc: ath10k


I'm going to assume that Toke will review all of this and there will be
changes, so you'd resend anyway ...

> - * @airtime_weight: station weight for airtime fairness calculation purposes
> + * @airtime_weight: station per-AC weight for airtime fairness calculation
> + * purposes

If you do, please replace the "*<space>purposes" by "*<tab>purposes" :-)

(otherwise I can just fix that myself too, but ...)

johannes


^ permalink raw reply	[flat|nested] 86+ messages in thread

* Re: [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
@ 2019-12-13  9:58     ` Johannes Berg
  0 siblings, 0 replies; 86+ messages in thread
From: Johannes Berg @ 2019-12-13  9:58 UTC (permalink / raw)
  To: Yibo Zhao, linux-wireless; +Cc: ath10k


I'm going to assume that Toke will review all of this and there will be
changes, so you'd resend anyway ...

> - * @airtime_weight: station weight for airtime fairness calculation purposes
> + * @airtime_weight: station per-AC weight for airtime fairness calculation
> + * purposes

If you do, please replace the "*<space>purposes" by "*<tab>purposes" :-)

(otherwise I can just fix that myself too, but ...)

johannes


_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply	[flat|nested] 86+ messages in thread

* [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
  2019-12-13  7:19 [PATCH V4 0/4] Enable virtual time-based airtime scheduler support on ath10k Yibo Zhao
@ 2019-12-13  7:19   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-12-13  7:19 UTC (permalink / raw)
  To: linux-wireless; +Cc: ath10k, Yibo Zhao

Global airtime weight sum is updated only when txq is added/removed
from rbtree. If upper layer configures sta weight during high load,
airtime weight sum will not be updated since txq is most likely on the
tree. It could a little late for upper layer to reconfigure sta weight
when txq is already in the rbtree. And thus, incorrect airtime weight sum
will lead to incorrect global virtual time calculation as well as overflow
of airtime weight sum during txq removed.

Hence, need to update airtime weight sum upon receiving event for
configuring sta weight once sta's txq is on the rbtree.

Besides, if airtime weight sum of ACs and sta weight is synced under the
same per AC lock protection, there can be a very short window causing
incorrct airtime weight sum calculation as below:

    active_txq_lock_VO                          .
    VO weight sum is syncd			.
    sta airtime weight sum is synced		.
    active_txq_unlock_VO			.
    .						.
    active_txq_lock_VI    			.
    VI weight sum is syncd			.
    sta airtime weight sum		active_txq_lock_BE
    active_txq_unlock_VI	      Remove txq and thus sum
    .				      is calculated with synced
    .				      sta airtime weight
    .					active_txq_unlock_BE

So introduce a per ac synced station airtime weight synced with per
AC synced weight sum together. And the per-AC station airtime weight
is used to calculate weight sum.

Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
---
 net/mac80211/cfg.c         | 29 ++++++++++++++++++++++++++---
 net/mac80211/debugfs_sta.c |  2 +-
 net/mac80211/sta_info.c    |  9 ++++-----
 net/mac80211/sta_info.h    |  5 +++--
 net/mac80211/tx.c          |  4 ++--
 5 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d65aa01..298b61d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	int ret = 0;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 mask, set;
+	u32 mask, set, tid, ac, pre_weight;
+	struct txq_info *txqi;
 
 	sband = ieee80211_get_sband(sdata);
 	if (!sband)
@@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
-	if (params->airtime_weight)
-		sta->airtime_weight = params->airtime_weight;
+	if (params->airtime_weight) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			spin_lock_bh(&local->active_txq_lock[ac]);
+			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+				if (params->airtime_weight == sta->airtime_weight[ac] ||
+				    !sta->sta.txq[tid] ||
+				    ac != ieee80211_ac_from_tid(tid))
+					continue;
+
+				pre_weight = sta->airtime_weight[ac];
+				sta->airtime_weight[ac] =
+						params->airtime_weight;
+
+				txqi = to_txq_info(sta->sta.txq[tid]);
+				if (RB_EMPTY_NODE(&txqi->schedule_order))
+					continue;
+
+				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
+								params->airtime_weight -
+								pre_weight;
+			}
+			spin_unlock_bh(&local->active_txq_lock[ac]);
+		}
+	}
 
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 80028da..43a7e6a 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -223,7 +223,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 		"Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
 		rx_airtime,
 		tx_airtime,
-		sta->airtime_weight,
+		sta->airtime_weight[0],
 		v_t[0],
 		v_t[1],
 		v_t[2],
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index feac975..e599cf1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -384,11 +384,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (sta_prepare_rate_control(local, sta, gfp))
 		goto free_txq;
 
-	sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
-
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime_weight[i] = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1850,11 +1849,11 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 	sta->airtime[ac].tx_airtime += tx_airtime;
 	sta->airtime[ac].rx_airtime += rx_airtime;
 
-	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
+	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight[ac];
 
 	/* Round the calculation of global vt */
 	local->airtime_v_t[ac] += (airtime + (weight_sum >> 1)) / weight_sum;
-	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
+	sta->airtime[ac].v_t += airtime / sta->airtime_weight[ac];
 	ieee80211_resort_txq(&local->hw, txq);
 
 	spin_unlock_bh(&local->active_txq_lock[ac]);
@@ -2236,7 +2235,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 	}
 
 	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
-		sinfo->airtime_weight = sta->airtime_weight;
+		sinfo->airtime_weight = sta->airtime_weight[0];
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c1cac9..6e831ef 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -476,7 +476,8 @@ struct ieee80211_sta_rx_stats {
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @airtime: per-AC struct airtime_info describing airtime statistics for this
  *	station
- * @airtime_weight: station weight for airtime fairness calculation purposes
+ * @airtime_weight: station per-AC weight for airtime fairness calculation
+ * purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
  * @debugfs_dir: debug filesystem directory dentry
@@ -602,7 +603,7 @@ struct sta_info {
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	struct airtime_info airtime[IEEE80211_NUM_ACS];
-	u16 airtime_weight;
+	u16 airtime_weight[IEEE80211_NUM_ACS];
 
 	/*
 	 * Aggregation information, locked with lock.
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b40cf91..56dba27 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3735,7 +3735,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] += sta->airtime_weight;
+		local->airtime_weight_sum[ac] += sta->airtime_weight[ac];
 		if (local->airtime_v_t[ac] > AIRTIME_GRACE)
 			sta->airtime[ac].v_t = max(local->airtime_v_t[ac] - AIRTIME_GRACE,
 						   sta->airtime[ac].v_t);
@@ -3785,7 +3785,7 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] -= sta->airtime_weight;
+		local->airtime_weight_sum[ac] -= sta->airtime_weight[ac];
 	}
 
 	rb_erase_cached(&txqi->schedule_order,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 86+ messages in thread

* [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together
@ 2019-12-13  7:19   ` Yibo Zhao
  0 siblings, 0 replies; 86+ messages in thread
From: Yibo Zhao @ 2019-12-13  7:19 UTC (permalink / raw)
  To: linux-wireless; +Cc: Yibo Zhao, ath10k

Global airtime weight sum is updated only when txq is added/removed
from rbtree. If upper layer configures sta weight during high load,
airtime weight sum will not be updated since txq is most likely on the
tree. It could a little late for upper layer to reconfigure sta weight
when txq is already in the rbtree. And thus, incorrect airtime weight sum
will lead to incorrect global virtual time calculation as well as overflow
of airtime weight sum during txq removed.

Hence, need to update airtime weight sum upon receiving event for
configuring sta weight once sta's txq is on the rbtree.

Besides, if airtime weight sum of ACs and sta weight is synced under the
same per AC lock protection, there can be a very short window causing
incorrct airtime weight sum calculation as below:

    active_txq_lock_VO                          .
    VO weight sum is syncd			.
    sta airtime weight sum is synced		.
    active_txq_unlock_VO			.
    .						.
    active_txq_lock_VI    			.
    VI weight sum is syncd			.
    sta airtime weight sum		active_txq_lock_BE
    active_txq_unlock_VI	      Remove txq and thus sum
    .				      is calculated with synced
    .				      sta airtime weight
    .					active_txq_unlock_BE

So introduce a per ac synced station airtime weight synced with per
AC synced weight sum together. And the per-AC station airtime weight
is used to calculate weight sum.

Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
---
 net/mac80211/cfg.c         | 29 ++++++++++++++++++++++++++---
 net/mac80211/debugfs_sta.c |  2 +-
 net/mac80211/sta_info.c    |  9 ++++-----
 net/mac80211/sta_info.h    |  5 +++--
 net/mac80211/tx.c          |  4 ++--
 5 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d65aa01..298b61d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1284,7 +1284,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	int ret = 0;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 mask, set;
+	u32 mask, set, tid, ac, pre_weight;
+	struct txq_info *txqi;
 
 	sband = ieee80211_get_sband(sdata);
 	if (!sband)
@@ -1452,8 +1453,30 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
-	if (params->airtime_weight)
-		sta->airtime_weight = params->airtime_weight;
+	if (params->airtime_weight) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			spin_lock_bh(&local->active_txq_lock[ac]);
+			for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+				if (params->airtime_weight == sta->airtime_weight[ac] ||
+				    !sta->sta.txq[tid] ||
+				    ac != ieee80211_ac_from_tid(tid))
+					continue;
+
+				pre_weight = sta->airtime_weight[ac];
+				sta->airtime_weight[ac] =
+						params->airtime_weight;
+
+				txqi = to_txq_info(sta->sta.txq[tid]);
+				if (RB_EMPTY_NODE(&txqi->schedule_order))
+					continue;
+
+				local->airtime_weight_sum[ac] = local->airtime_weight_sum[ac] +
+								params->airtime_weight -
+								pre_weight;
+			}
+			spin_unlock_bh(&local->active_txq_lock[ac]);
+		}
+	}
 
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 80028da..43a7e6a 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -223,7 +223,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 		"Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
 		rx_airtime,
 		tx_airtime,
-		sta->airtime_weight,
+		sta->airtime_weight[0],
 		v_t[0],
 		v_t[1],
 		v_t[2],
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index feac975..e599cf1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -384,11 +384,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (sta_prepare_rate_control(local, sta, gfp))
 		goto free_txq;
 
-	sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
-
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime_weight[i] = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1850,11 +1849,11 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 	sta->airtime[ac].tx_airtime += tx_airtime;
 	sta->airtime[ac].rx_airtime += rx_airtime;
 
-	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight;
+	weight_sum = local->airtime_weight_sum[ac] ?: sta->airtime_weight[ac];
 
 	/* Round the calculation of global vt */
 	local->airtime_v_t[ac] += (airtime + (weight_sum >> 1)) / weight_sum;
-	sta->airtime[ac].v_t += airtime / sta->airtime_weight;
+	sta->airtime[ac].v_t += airtime / sta->airtime_weight[ac];
 	ieee80211_resort_txq(&local->hw, txq);
 
 	spin_unlock_bh(&local->active_txq_lock[ac]);
@@ -2236,7 +2235,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 	}
 
 	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
-		sinfo->airtime_weight = sta->airtime_weight;
+		sinfo->airtime_weight = sta->airtime_weight[0];
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c1cac9..6e831ef 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -476,7 +476,8 @@ struct ieee80211_sta_rx_stats {
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @airtime: per-AC struct airtime_info describing airtime statistics for this
  *	station
- * @airtime_weight: station weight for airtime fairness calculation purposes
+ * @airtime_weight: station per-AC weight for airtime fairness calculation
+ * purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
  * @debugfs_dir: debug filesystem directory dentry
@@ -602,7 +603,7 @@ struct sta_info {
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	struct airtime_info airtime[IEEE80211_NUM_ACS];
-	u16 airtime_weight;
+	u16 airtime_weight[IEEE80211_NUM_ACS];
 
 	/*
 	 * Aggregation information, locked with lock.
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b40cf91..56dba27 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3735,7 +3735,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] += sta->airtime_weight;
+		local->airtime_weight_sum[ac] += sta->airtime_weight[ac];
 		if (local->airtime_v_t[ac] > AIRTIME_GRACE)
 			sta->airtime[ac].v_t = max(local->airtime_v_t[ac] - AIRTIME_GRACE,
 						   sta->airtime[ac].v_t);
@@ -3785,7 +3785,7 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 		struct sta_info *sta = container_of(txq->sta,
 						    struct sta_info, sta);
 
-		local->airtime_weight_sum[ac] -= sta->airtime_weight;
+		local->airtime_weight_sum[ac] -= sta->airtime_weight[ac];
 	}
 
 	rb_erase_cached(&txqi->schedule_order,
-- 
1.9.1

_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k

^ permalink raw reply related	[flat|nested] 86+ messages in thread

end of thread, other threads:[~2019-12-17 15:20 UTC | newest]

Thread overview: 86+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-16 13:09 [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler Yibo Zhao
2019-09-16 13:09 ` Yibo Zhao
2019-09-16 13:09 ` [PATCH 2/4] mac80211: defer txqs removal from rbtree Yibo Zhao
2019-09-16 13:09   ` Yibo Zhao
2019-09-17 21:10   ` Toke Høiland-Jørgensen
2019-09-17 21:10     ` Toke Høiland-Jørgensen
2019-09-18 10:27     ` Yibo Zhao
2019-09-18 10:27       ` Yibo Zhao
2019-09-18 11:23       ` Toke Høiland-Jørgensen
2019-09-18 11:23         ` Toke Høiland-Jørgensen
2019-09-19  9:56         ` Yibo Zhao
2019-09-19  9:56           ` Yibo Zhao
2019-09-19 10:37           ` Toke Høiland-Jørgensen
2019-09-19 10:37             ` Toke Høiland-Jørgensen
2019-09-20  8:29             ` Yibo Zhao
2019-09-20  8:29               ` Yibo Zhao
2019-09-20  9:15               ` Toke Høiland-Jørgensen
2019-09-20  9:15                 ` Toke Høiland-Jørgensen
2019-09-21 10:49                 ` Yibo Zhao
2019-09-21 10:49                   ` Yibo Zhao
2019-09-21 11:27                   ` Toke Høiland-Jørgensen
2019-09-21 11:27                     ` Toke Høiland-Jørgensen
2019-09-21 11:53                     ` Yibo Zhao
2019-09-21 11:53                       ` Yibo Zhao
2019-09-21 12:22                     ` Yibo Zhao
2019-09-21 12:22                       ` Yibo Zhao
2019-09-21 13:02                       ` Toke Høiland-Jørgensen
2019-09-21 13:02                         ` Toke Høiland-Jørgensen
2019-09-21 13:24                         ` Yibo Zhao
2019-09-21 13:24                           ` Yibo Zhao
2019-09-21 14:00                           ` Toke Høiland-Jørgensen
2019-09-21 14:00                             ` Toke Høiland-Jørgensen
2019-09-22  5:19                             ` Yibo Zhao
2019-09-22  5:19                               ` Yibo Zhao
2019-09-23 10:47                               ` Toke Høiland-Jørgensen
2019-09-23 10:47                                 ` Toke Høiland-Jørgensen
2019-09-23 11:42                                 ` Kalle Valo
2019-09-23 11:42                                   ` Kalle Valo
2019-09-23 16:39                                   ` Toke Høiland-Jørgensen
2019-09-23 16:39                                     ` Toke Høiland-Jørgensen
2019-09-24  5:27                                     ` Kalle Valo
2019-09-24  5:27                                       ` Kalle Valo
2019-09-24  7:23                                       ` Toke Høiland-Jørgensen
2019-09-24  7:23                                         ` Toke Høiland-Jørgensen
2019-09-24  2:45                                 ` Yibo Zhao
2019-09-24  2:45                                   ` Yibo Zhao
2019-09-24  7:26                                   ` Toke Høiland-Jørgensen
2019-09-24  7:26                                     ` Toke Høiland-Jørgensen
2019-09-24  8:31                                     ` Yibo Zhao
2019-09-24  8:31                                       ` Yibo Zhao
2019-09-24  8:44                                       ` Toke Høiland-Jørgensen
2019-09-24  8:44                                         ` Toke Høiland-Jørgensen
2019-09-16 13:09 ` [PATCH 3/4] mac80211: fix low throughput in push pull mode Yibo Zhao
2019-09-16 13:09   ` Yibo Zhao
2019-09-16 15:27   ` Johannes Berg
2019-09-16 15:27     ` Johannes Berg
2019-09-17  6:36     ` Yibo Zhao
2019-09-17  6:36       ` Yibo Zhao
2019-09-17  6:55       ` Johannes Berg
2019-09-17  6:55         ` Johannes Berg
2019-09-17 21:12       ` Toke Høiland-Jørgensen
2019-09-17 21:12         ` Toke Høiland-Jørgensen
2019-09-18 10:02         ` Yibo Zhao
2019-09-18 10:02           ` Yibo Zhao
2019-09-18 10:16           ` Toke Høiland-Jørgensen
2019-09-18 10:16             ` Toke Høiland-Jørgensen
2019-09-18 10:18             ` Yibo Zhao
2019-09-18 10:18               ` Yibo Zhao
2019-09-16 13:09 ` [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together Yibo Zhao
2019-09-16 13:09   ` Yibo Zhao
2019-09-17 21:24   ` Toke Høiland-Jørgensen
2019-09-17 21:24     ` Toke Høiland-Jørgensen
2019-09-18 10:16     ` Yibo Zhao
2019-09-18 10:16       ` Yibo Zhao
2019-09-16 14:51 ` [PATCH 1/4] mac80211: Switch to a virtual time-based airtime scheduler Toke Høiland-Jørgensen
2019-09-16 14:51   ` Toke Høiland-Jørgensen
2019-09-17 21:31 ` Toke Høiland-Jørgensen
2019-09-17 21:31   ` Toke Høiland-Jørgensen
2019-09-20  8:37   ` Yibo Zhao
2019-09-20  8:37     ` Yibo Zhao
2019-12-13  7:19 [PATCH V4 0/4] Enable virtual time-based airtime scheduler support on ath10k Yibo Zhao
2019-12-13  7:19 ` [PATCH 4/4] mac80211: Sync airtime weight sum with per AC synced sta airtime weight together Yibo Zhao
2019-12-13  7:19   ` Yibo Zhao
2019-12-13  9:58   ` Johannes Berg
2019-12-13  9:58     ` Johannes Berg
2019-12-17 15:20     ` Toke Høiland-Jørgensen
2019-12-17 15:20       ` Toke Høiland-Jørgensen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.