linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
@ 2018-11-30 17:59 Brian Norris
  2018-11-30 19:04 ` [EXT] " Ganapathi Bhat
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Brian Norris @ 2018-11-30 17:59 UTC (permalink / raw)
  To: Kalle Valo
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam,
	Ganapathi Bhat, Xinming Hu, linux-wireless, Brian Norris, stable

This reverts commit 5188d5453bc9380ccd4ae1086138dd485d13aef2, because it
introduced lock recursion:

  BUG: spinlock recursion on CPU#2, kworker/u13:1/395
   lock: 0xffffffc0e28a47f0, .magic: dead4ead, .owner: kworker/u13:1/395, .owner_cpu: 2
  CPU: 2 PID: 395 Comm: kworker/u13:1 Not tainted 4.20.0-rc4+ #2
  Hardware name: Google Kevin (DT)
  Workqueue: MWIFIEX_RX_WORK_QUEUE mwifiex_rx_work_queue [mwifiex]
  Call trace:
   dump_backtrace+0x0/0x140
   show_stack+0x20/0x28
   dump_stack+0x84/0xa4
   spin_bug+0x98/0xa4
   do_raw_spin_lock+0x5c/0xdc
   _raw_spin_lock_irqsave+0x38/0x48
   mwifiex_flush_data+0x2c/0xa4 [mwifiex]
   call_timer_fn+0xcc/0x1c4
   run_timer_softirq+0x264/0x4f0
   __do_softirq+0x1a8/0x35c
   do_softirq+0x54/0x64
   netif_rx_ni+0xe8/0x120
   mwifiex_recv_packet+0xfc/0x10c [mwifiex]
   mwifiex_process_rx_packet+0x1d4/0x238 [mwifiex]
   mwifiex_11n_dispatch_pkt+0x190/0x1ac [mwifiex]
   mwifiex_11n_rx_reorder_pkt+0x28c/0x354 [mwifiex]
   mwifiex_process_sta_rx_packet+0x204/0x26c [mwifiex]
   mwifiex_handle_rx_packet+0x15c/0x16c [mwifiex]
   mwifiex_rx_work_queue+0x104/0x134 [mwifiex]
   worker_thread+0x4cc/0x72c
   kthread+0x134/0x13c
   ret_from_fork+0x10/0x18

This was clearly not tested well at all. I simply performed 'wget' in a
loop and it fell over within a few seconds.

Fixes: 5188d5453bc9 ("mwifiex: restructure rx_reorder_tbl_lock usage")
Cc: <stable@vger.kernel.org>
Cc: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Brian Norris <briannorris@chromium.org>
---
This is a 4.19 regression, but IMO the revert should be merged ASAP.

 drivers/net/wireless/marvell/mwifiex/11n.c    |  5 +-
 .../wireless/marvell/mwifiex/11n_rxreorder.c  | 96 ++++++++++---------
 .../net/wireless/marvell/mwifiex/uap_txrx.c   |  3 -
 3 files changed, 51 insertions(+), 53 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index e2addd8b878b..5d75c971004b 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -696,11 +696,10 @@ void mwifiex_11n_delba(struct mwifiex_private *priv, int tid)
 				"Send delba to tid=%d, %pM\n",
 				tid, rx_reor_tbl_ptr->ta);
 			mwifiex_send_delba(priv, tid, rx_reor_tbl_ptr->ta, 0);
-			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
-					       flags);
-			return;
+			goto exit;
 		}
 	}
+exit:
 	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 }
 
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index 8e63d14c1e1c..5380fba652cc 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -103,8 +103,6 @@ static int mwifiex_11n_dispatch_pkt(struct mwifiex_private *priv, void *payload)
  * There could be holes in the buffer, which are skipped by the function.
  * Since the buffer is linear, the function uses rotation to simulate
  * circular buffer.
- *
- * The caller must hold rx_reorder_tbl_lock spinlock.
  */
 static void
 mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv,
@@ -113,21 +111,25 @@ mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv,
 {
 	int pkt_to_send, i;
 	void *rx_tmp_ptr;
+	unsigned long flags;
 
 	pkt_to_send = (start_win > tbl->start_win) ?
 		      min((start_win - tbl->start_win), tbl->win_size) :
 		      tbl->win_size;
 
 	for (i = 0; i < pkt_to_send; ++i) {
+		spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 		rx_tmp_ptr = NULL;
 		if (tbl->rx_reorder_ptr[i]) {
 			rx_tmp_ptr = tbl->rx_reorder_ptr[i];
 			tbl->rx_reorder_ptr[i] = NULL;
 		}
+		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		if (rx_tmp_ptr)
 			mwifiex_11n_dispatch_pkt(priv, rx_tmp_ptr);
 	}
 
+	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	/*
 	 * We don't have a circular buffer, hence use rotation to simulate
 	 * circular buffer
@@ -138,6 +140,7 @@ mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv,
 	}
 
 	tbl->start_win = start_win;
+	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 }
 
 /*
@@ -147,8 +150,6 @@ mwifiex_11n_dispatch_pkt_until_start_win(struct mwifiex_private *priv,
  * The start window is adjusted automatically when a hole is located.
  * Since the buffer is linear, the function uses rotation to simulate
  * circular buffer.
- *
- * The caller must hold rx_reorder_tbl_lock spinlock.
  */
 static void
 mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv,
@@ -156,15 +157,22 @@ mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv,
 {
 	int i, j, xchg;
 	void *rx_tmp_ptr;
+	unsigned long flags;
 
 	for (i = 0; i < tbl->win_size; ++i) {
-		if (!tbl->rx_reorder_ptr[i])
+		spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
+		if (!tbl->rx_reorder_ptr[i]) {
+			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
+					       flags);
 			break;
+		}
 		rx_tmp_ptr = tbl->rx_reorder_ptr[i];
 		tbl->rx_reorder_ptr[i] = NULL;
+		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		mwifiex_11n_dispatch_pkt(priv, rx_tmp_ptr);
 	}
 
+	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	/*
 	 * We don't have a circular buffer, hence use rotation to simulate
 	 * circular buffer
@@ -177,6 +185,7 @@ mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv,
 		}
 	}
 	tbl->start_win = (tbl->start_win + i) & (MAX_TID_VALUE - 1);
+	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 }
 
 /*
@@ -184,8 +193,6 @@ mwifiex_11n_scan_and_dispatch(struct mwifiex_private *priv,
  *
  * The function stops the associated timer and dispatches all the
  * pending packets in the Rx reorder table before deletion.
- *
- * The caller must hold rx_reorder_tbl_lock spinlock.
  */
 static void
 mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv,
@@ -211,7 +218,11 @@ mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv,
 
 	del_timer_sync(&tbl->timer_context.timer);
 	tbl->timer_context.timer_is_set = false;
+
+	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	list_del(&tbl->list);
+	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
+
 	kfree(tbl->rx_reorder_ptr);
 	kfree(tbl);
 
@@ -224,17 +235,22 @@ mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv,
 /*
  * This function returns the pointer to an entry in Rx reordering
  * table which matches the given TA/TID pair.
- *
- * The caller must hold rx_reorder_tbl_lock spinlock.
  */
 struct mwifiex_rx_reorder_tbl *
 mwifiex_11n_get_rx_reorder_tbl(struct mwifiex_private *priv, int tid, u8 *ta)
 {
 	struct mwifiex_rx_reorder_tbl *tbl;
+	unsigned long flags;
 
-	list_for_each_entry(tbl, &priv->rx_reorder_tbl_ptr, list)
-		if (!memcmp(tbl->ta, ta, ETH_ALEN) && tbl->tid == tid)
+	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
+	list_for_each_entry(tbl, &priv->rx_reorder_tbl_ptr, list) {
+		if (!memcmp(tbl->ta, ta, ETH_ALEN) && tbl->tid == tid) {
+			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
+					       flags);
 			return tbl;
+		}
+	}
+	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
 	return NULL;
 }
@@ -251,9 +267,14 @@ void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta)
 		return;
 
 	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
-	list_for_each_entry_safe(tbl, tmp, &priv->rx_reorder_tbl_ptr, list)
-		if (!memcmp(tbl->ta, ta, ETH_ALEN))
+	list_for_each_entry_safe(tbl, tmp, &priv->rx_reorder_tbl_ptr, list) {
+		if (!memcmp(tbl->ta, ta, ETH_ALEN)) {
+			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
+					       flags);
 			mwifiex_del_rx_reorder_entry(priv, tbl);
+			spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
+		}
+	}
 	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
 	return;
@@ -262,18 +283,24 @@ void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta)
 /*
  * This function finds the last sequence number used in the packets
  * buffered in Rx reordering table.
- *
- * The caller must hold rx_reorder_tbl_lock spinlock.
  */
 static int
 mwifiex_11n_find_last_seq_num(struct reorder_tmr_cnxt *ctx)
 {
 	struct mwifiex_rx_reorder_tbl *rx_reorder_tbl_ptr = ctx->ptr;
+	struct mwifiex_private *priv = ctx->priv;
+	unsigned long flags;
 	int i;
 
-	for (i = rx_reorder_tbl_ptr->win_size - 1; i >= 0; --i)
-		if (rx_reorder_tbl_ptr->rx_reorder_ptr[i])
+	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
+	for (i = rx_reorder_tbl_ptr->win_size - 1; i >= 0; --i) {
+		if (rx_reorder_tbl_ptr->rx_reorder_ptr[i]) {
+			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
+					       flags);
 			return i;
+		}
+	}
+	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
 	return -1;
 }
@@ -291,22 +318,17 @@ mwifiex_flush_data(struct timer_list *t)
 	struct reorder_tmr_cnxt *ctx =
 		from_timer(ctx, t, timer);
 	int start_win, seq_num;
-	unsigned long flags;
 
 	ctx->timer_is_set = false;
-	spin_lock_irqsave(&ctx->priv->rx_reorder_tbl_lock, flags);
 	seq_num = mwifiex_11n_find_last_seq_num(ctx);
 
-	if (seq_num < 0) {
-		spin_unlock_irqrestore(&ctx->priv->rx_reorder_tbl_lock, flags);
+	if (seq_num < 0)
 		return;
-	}
 
 	mwifiex_dbg(ctx->priv->adapter, INFO, "info: flush data %d\n", seq_num);
 	start_win = (ctx->ptr->start_win + seq_num + 1) & (MAX_TID_VALUE - 1);
 	mwifiex_11n_dispatch_pkt_until_start_win(ctx->priv, ctx->ptr,
 						 start_win);
-	spin_unlock_irqrestore(&ctx->priv->rx_reorder_tbl_lock, flags);
 }
 
 /*
@@ -333,14 +355,11 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta,
 	 * If we get a TID, ta pair which is already present dispatch all the
 	 * the packets and move the window size until the ssn
 	 */
-	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, ta);
 	if (tbl) {
 		mwifiex_11n_dispatch_pkt_until_start_win(priv, tbl, seq_num);
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		return;
 	}
-	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 	/* if !tbl then create one */
 	new_node = kzalloc(sizeof(struct mwifiex_rx_reorder_tbl), GFP_KERNEL);
 	if (!new_node)
@@ -551,20 +570,16 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv,
 	int prev_start_win, start_win, end_win, win_size;
 	u16 pkt_index;
 	bool init_window_shift = false;
-	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, ta);
 	if (!tbl) {
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		if (pkt_type != PKT_TYPE_BAR)
 			mwifiex_11n_dispatch_pkt(priv, payload);
 		return ret;
 	}
 
 	if ((pkt_type == PKT_TYPE_AMSDU) && !tbl->amsdu) {
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		mwifiex_11n_dispatch_pkt(priv, payload);
 		return ret;
 	}
@@ -651,8 +666,6 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv,
 	if (!tbl->timer_context.timer_is_set ||
 	    prev_start_win != tbl->start_win)
 		mwifiex_11n_rxreorder_timer_restart(tbl);
-
-	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 	return ret;
 }
 
@@ -681,18 +694,14 @@ mwifiex_del_ba_tbl(struct mwifiex_private *priv, int tid, u8 *peer_mac,
 		    peer_mac, tid, initiator);
 
 	if (cleanup_rx_reorder_tbl) {
-		spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 		tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid,
 								 peer_mac);
 		if (!tbl) {
-			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
-					       flags);
 			mwifiex_dbg(priv->adapter, EVENT,
 				    "event: TID, TA not found in table\n");
 			return;
 		}
 		mwifiex_del_rx_reorder_entry(priv, tbl);
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 	} else {
 		ptx_tbl = mwifiex_get_ba_tbl(priv, tid, peer_mac);
 		if (!ptx_tbl) {
@@ -726,7 +735,6 @@ int mwifiex_ret_11n_addba_resp(struct mwifiex_private *priv,
 	int tid, win_size;
 	struct mwifiex_rx_reorder_tbl *tbl;
 	uint16_t block_ack_param_set;
-	unsigned long flags;
 
 	block_ack_param_set = le16_to_cpu(add_ba_rsp->block_ack_param_set);
 
@@ -740,20 +748,17 @@ int mwifiex_ret_11n_addba_resp(struct mwifiex_private *priv,
 		mwifiex_dbg(priv->adapter, ERROR, "ADDBA RSP: failed %pM tid=%d)\n",
 			    add_ba_rsp->peer_mac_addr, tid);
 
-		spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 		tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid,
 						     add_ba_rsp->peer_mac_addr);
 		if (tbl)
 			mwifiex_del_rx_reorder_entry(priv, tbl);
 
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		return 0;
 	}
 
 	win_size = (block_ack_param_set & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK)
 		    >> BLOCKACKPARAM_WINSIZE_POS;
 
-	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid,
 					     add_ba_rsp->peer_mac_addr);
 	if (tbl) {
@@ -764,7 +769,6 @@ int mwifiex_ret_11n_addba_resp(struct mwifiex_private *priv,
 		else
 			tbl->amsdu = false;
 	}
-	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
 	mwifiex_dbg(priv->adapter, CMD,
 		    "cmd: ADDBA RSP: %pM tid=%d ssn=%d win_size=%d\n",
@@ -804,8 +808,11 @@ void mwifiex_11n_cleanup_reorder_tbl(struct mwifiex_private *priv)
 
 	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	list_for_each_entry_safe(del_tbl_ptr, tmp_node,
-				 &priv->rx_reorder_tbl_ptr, list)
+				 &priv->rx_reorder_tbl_ptr, list) {
+		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		mwifiex_del_rx_reorder_entry(priv, del_tbl_ptr);
+		spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
+	}
 	INIT_LIST_HEAD(&priv->rx_reorder_tbl_ptr);
 	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
@@ -929,7 +936,6 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
 	int tlv_buf_left = len;
 	int ret;
 	u8 *tmp;
-	unsigned long flags;
 
 	mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:",
 			 event_buf, len);
@@ -949,18 +955,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
 			    tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num,
 			    tlv_bitmap_len);
 
-		spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 		rx_reor_tbl_ptr =
 			mwifiex_11n_get_rx_reorder_tbl(priv, tlv_rxba->tid,
 						       tlv_rxba->mac);
 		if (!rx_reor_tbl_ptr) {
-			spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
-					       flags);
 			mwifiex_dbg(priv->adapter, ERROR,
 				    "Can not find rx_reorder_tbl!");
 			return;
 		}
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
 		for (i = 0; i < tlv_bitmap_len; i++) {
 			for (j = 0 ; j < 8; j++) {
diff --git a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
index a83c5afc256a..5ce85d5727e4 100644
--- a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
+++ b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
@@ -421,15 +421,12 @@ int mwifiex_process_uap_rx_packet(struct mwifiex_private *priv,
 		spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
 	}
 
-	spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
 	if (!priv->ap_11n_enabled ||
 	    (!mwifiex_11n_get_rx_reorder_tbl(priv, uap_rx_pd->priority, ta) &&
 	    (le16_to_cpu(uap_rx_pd->rx_pkt_type) != PKT_TYPE_AMSDU))) {
 		ret = mwifiex_handle_uap_rx_forward(priv, skb);
-		spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 		return ret;
 	}
-	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
 
 	/* Reorder and send to kernel */
 	pkt_type = (u8)le16_to_cpu(uap_rx_pd->rx_pkt_type);
-- 
2.20.0.rc1.387.gf8505762e3-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [EXT] [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2018-11-30 17:59 [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage" Brian Norris
@ 2018-11-30 19:04 ` Ganapathi Bhat
  2018-12-01  2:27   ` Brian Norris
  2018-12-03 12:37 ` Kalle Valo
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Ganapathi Bhat @ 2018-11-30 19:04 UTC (permalink / raw)
  To: Brian Norris, Kalle Valo
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam, Xinming Hu,
	linux-wireless, stable

Hi Brian,
> 
> This was clearly not tested well at all. I simply performed 'wget' in a loop and
> it fell over within a few seconds.
Sorry. I had run a iperf test before sharing the patch (no regression observed).
It looks I failed to get this right for second time I will check this.

Regards,
Ganapathi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [EXT] [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2018-11-30 19:04 ` [EXT] " Ganapathi Bhat
@ 2018-12-01  2:27   ` Brian Norris
  0 siblings, 0 replies; 8+ messages in thread
From: Brian Norris @ 2018-12-01  2:27 UTC (permalink / raw)
  To: Ganapathi Bhat
  Cc: Kalle Valo, linux-kernel, Amitkumar Karwar, Nishant Sarmukadam,
	Xinming Hu, linux-wireless, stable

On Fri, Nov 30, 2018 at 07:04:49PM +0000, Ganapathi Bhat wrote:
> > This was clearly not tested well at all. I simply performed 'wget' in a loop and
> > it fell over within a few seconds.
> Sorry. I had run a iperf test before sharing the patch (no regression observed).
> It looks I failed to get this right for second time I will check this.

Were you running on an 11n/ac network? IIUC, the particular code that's
being blamed is only active with 11n+ features.

You might also make sure you test with stuff like lockdep enabled, as
that gives nice warnings and can even preemptively warn about potential
problems before they even occur, if you use the right settings. (Some
lockdep configurations can slow things down significantly, BTW.)

Brian

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2018-11-30 17:59 [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage" Brian Norris
  2018-11-30 19:04 ` [EXT] " Ganapathi Bhat
@ 2018-12-03 12:37 ` Kalle Valo
  2018-12-13 13:54 ` Kalle Valo
  2019-03-08  2:34 ` Brian Norris
  3 siblings, 0 replies; 8+ messages in thread
From: Kalle Valo @ 2018-12-03 12:37 UTC (permalink / raw)
  To: Brian Norris
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam,
	Ganapathi Bhat, Xinming Hu, linux-wireless, stable

Brian Norris <briannorris@chromium.org> writes:

> This reverts commit 5188d5453bc9380ccd4ae1086138dd485d13aef2, because it
> introduced lock recursion:
>
>   BUG: spinlock recursion on CPU#2, kworker/u13:1/395
>    lock: 0xffffffc0e28a47f0, .magic: dead4ead, .owner: kworker/u13:1/395, .owner_cpu: 2
>   CPU: 2 PID: 395 Comm: kworker/u13:1 Not tainted 4.20.0-rc4+ #2
>   Hardware name: Google Kevin (DT)
>   Workqueue: MWIFIEX_RX_WORK_QUEUE mwifiex_rx_work_queue [mwifiex]
>   Call trace:
>    dump_backtrace+0x0/0x140
>    show_stack+0x20/0x28
>    dump_stack+0x84/0xa4
>    spin_bug+0x98/0xa4
>    do_raw_spin_lock+0x5c/0xdc
>    _raw_spin_lock_irqsave+0x38/0x48
>    mwifiex_flush_data+0x2c/0xa4 [mwifiex]
>    call_timer_fn+0xcc/0x1c4
>    run_timer_softirq+0x264/0x4f0
>    __do_softirq+0x1a8/0x35c
>    do_softirq+0x54/0x64
>    netif_rx_ni+0xe8/0x120
>    mwifiex_recv_packet+0xfc/0x10c [mwifiex]
>    mwifiex_process_rx_packet+0x1d4/0x238 [mwifiex]
>    mwifiex_11n_dispatch_pkt+0x190/0x1ac [mwifiex]
>    mwifiex_11n_rx_reorder_pkt+0x28c/0x354 [mwifiex]
>    mwifiex_process_sta_rx_packet+0x204/0x26c [mwifiex]
>    mwifiex_handle_rx_packet+0x15c/0x16c [mwifiex]
>    mwifiex_rx_work_queue+0x104/0x134 [mwifiex]
>    worker_thread+0x4cc/0x72c
>    kthread+0x134/0x13c
>    ret_from_fork+0x10/0x18
>
> This was clearly not tested well at all. I simply performed 'wget' in a
> loop and it fell over within a few seconds.
>
> Fixes: 5188d5453bc9 ("mwifiex: restructure rx_reorder_tbl_lock usage")
> Cc: <stable@vger.kernel.org>
> Cc: Ganapathi Bhat <gbhat@marvell.com>
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> ---
> This is a 4.19 regression, but IMO the revert should be merged ASAP.

Ok, I'll queue this for 4.20.

-- 
Kalle Valo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2018-11-30 17:59 [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage" Brian Norris
  2018-11-30 19:04 ` [EXT] " Ganapathi Bhat
  2018-12-03 12:37 ` Kalle Valo
@ 2018-12-13 13:54 ` Kalle Valo
  2019-03-08  2:34 ` Brian Norris
  3 siblings, 0 replies; 8+ messages in thread
From: Kalle Valo @ 2018-12-13 13:54 UTC (permalink / raw)
  To: Brian Norris
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam,
	Ganapathi Bhat, Xinming Hu, linux-wireless, Brian Norris, stable

Brian Norris <briannorris@chromium.org> wrote:

> This reverts commit 5188d5453bc9380ccd4ae1086138dd485d13aef2, because it
> introduced lock recursion:
> 
>   BUG: spinlock recursion on CPU#2, kworker/u13:1/395
>    lock: 0xffffffc0e28a47f0, .magic: dead4ead, .owner: kworker/u13:1/395, .owner_cpu: 2
>   CPU: 2 PID: 395 Comm: kworker/u13:1 Not tainted 4.20.0-rc4+ #2
>   Hardware name: Google Kevin (DT)
>   Workqueue: MWIFIEX_RX_WORK_QUEUE mwifiex_rx_work_queue [mwifiex]
>   Call trace:
>    dump_backtrace+0x0/0x140
>    show_stack+0x20/0x28
>    dump_stack+0x84/0xa4
>    spin_bug+0x98/0xa4
>    do_raw_spin_lock+0x5c/0xdc
>    _raw_spin_lock_irqsave+0x38/0x48
>    mwifiex_flush_data+0x2c/0xa4 [mwifiex]
>    call_timer_fn+0xcc/0x1c4
>    run_timer_softirq+0x264/0x4f0
>    __do_softirq+0x1a8/0x35c
>    do_softirq+0x54/0x64
>    netif_rx_ni+0xe8/0x120
>    mwifiex_recv_packet+0xfc/0x10c [mwifiex]
>    mwifiex_process_rx_packet+0x1d4/0x238 [mwifiex]
>    mwifiex_11n_dispatch_pkt+0x190/0x1ac [mwifiex]
>    mwifiex_11n_rx_reorder_pkt+0x28c/0x354 [mwifiex]
>    mwifiex_process_sta_rx_packet+0x204/0x26c [mwifiex]
>    mwifiex_handle_rx_packet+0x15c/0x16c [mwifiex]
>    mwifiex_rx_work_queue+0x104/0x134 [mwifiex]
>    worker_thread+0x4cc/0x72c
>    kthread+0x134/0x13c
>    ret_from_fork+0x10/0x18
> 
> This was clearly not tested well at all. I simply performed 'wget' in a
> loop and it fell over within a few seconds.
> 
> Fixes: 5188d5453bc9 ("mwifiex: restructure rx_reorder_tbl_lock usage")
> Cc: <stable@vger.kernel.org>
> Cc: Ganapathi Bhat <gbhat@marvell.com>
> Signed-off-by: Brian Norris <briannorris@chromium.org>

Patch applied to wireless-drivers.git, thanks.

1aa48f088615 Revert "mwifiex: restructure rx_reorder_tbl_lock usage"

-- 
https://patchwork.kernel.org/patch/10706959/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2018-11-30 17:59 [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage" Brian Norris
                   ` (2 preceding siblings ...)
  2018-12-13 13:54 ` Kalle Valo
@ 2019-03-08  2:34 ` Brian Norris
  2019-06-04  3:03   ` [EXT] " Ganapathi Bhat
  3 siblings, 1 reply; 8+ messages in thread
From: Brian Norris @ 2019-03-08  2:34 UTC (permalink / raw)
  To: Ganapathi Bhat
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam,
	Ganapathi Bhat, Xinming Hu, linux-wireless, stable

Hi again Ganapathi,

By the way, I was a little curious about what went wrong here, so I dug
in a little further:

On Fri, Nov 30, 2018 at 09:59:57AM -0800, Brian Norris wrote:
> This reverts commit 5188d5453bc9380ccd4ae1086138dd485d13aef2, because it
> introduced lock recursion:
> 
>   BUG: spinlock recursion on CPU#2, kworker/u13:1/395
>    lock: 0xffffffc0e28a47f0, .magic: dead4ead, .owner: kworker/u13:1/395, .owner_cpu: 2
>   CPU: 2 PID: 395 Comm: kworker/u13:1 Not tainted 4.20.0-rc4+ #2
>   Hardware name: Google Kevin (DT)
>   Workqueue: MWIFIEX_RX_WORK_QUEUE mwifiex_rx_work_queue [mwifiex]
>   Call trace:
>    dump_backtrace+0x0/0x140
>    show_stack+0x20/0x28
>    dump_stack+0x84/0xa4
>    spin_bug+0x98/0xa4
>    do_raw_spin_lock+0x5c/0xdc
>    _raw_spin_lock_irqsave+0x38/0x48
>    mwifiex_flush_data+0x2c/0xa4 [mwifiex]
>    call_timer_fn+0xcc/0x1c4
>    run_timer_softirq+0x264/0x4f0
>    __do_softirq+0x1a8/0x35c
>    do_softirq+0x54/0x64
>    netif_rx_ni+0xe8/0x120
>    mwifiex_recv_packet+0xfc/0x10c [mwifiex]
>    mwifiex_process_rx_packet+0x1d4/0x238 [mwifiex]
>    mwifiex_11n_dispatch_pkt+0x190/0x1ac [mwifiex]
>    mwifiex_11n_rx_reorder_pkt+0x28c/0x354 [mwifiex]

TL;DR: the problem was right here ^^^
where you started running mwifiex_11n_dispatch_pkt() (via
mwifiex_11n_scan_and_dispatch()) while holding a spinlock.

When you do that, you eventually call netif_rx_ni(), which specifically
defers to softirq contexts. Then, if you happen to have your flush timer
expiring just before that, you end up in mwifiex_flush_data(), which
also needs that spinlock.

There are a few possible ways to handle this:
(a) prevent processing softirqs in that context; e.g., with
    local_bh_disable(). This seems somewhat of a hack.
    (Side note: I think most of the locks in this driver really could be
    spin_lock_bh(), not spin_lock_irqsave() -- we don't really care
    about hardirq context for 99% of these locks.)
(b) restructure so that packet processing (e.g., netif_rx_ni()) is done
    outside of the spinlock.

It's actually not that hard to do (b). You can just queue your skb's up
in a temporary sk_buff_head list and process them all at once after
you've finished processing the reorder table. I have a local patch to do
this, and I might send it your way if I can give it a bit more testing.

Brian

>    mwifiex_process_sta_rx_packet+0x204/0x26c [mwifiex]
>    mwifiex_handle_rx_packet+0x15c/0x16c [mwifiex]
>    mwifiex_rx_work_queue+0x104/0x134 [mwifiex]
>    worker_thread+0x4cc/0x72c
>    kthread+0x134/0x13c
>    ret_from_fork+0x10/0x18
> 
> This was clearly not tested well at all. I simply performed 'wget' in a
> loop and it fell over within a few seconds.
> 
> Fixes: 5188d5453bc9 ("mwifiex: restructure rx_reorder_tbl_lock usage")
> Cc: <stable@vger.kernel.org>
> Cc: Ganapathi Bhat <gbhat@marvell.com>
> Signed-off-by: Brian Norris <briannorris@chromium.org>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [EXT] Re: [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2019-03-08  2:34 ` Brian Norris
@ 2019-06-04  3:03   ` Ganapathi Bhat
  2019-06-04 20:57     ` Brian Norris
  0 siblings, 1 reply; 8+ messages in thread
From: Ganapathi Bhat @ 2019-06-04  3:03 UTC (permalink / raw)
  To: Brian Norris
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam, Xinming Hu,
	linux-wireless, stable

Hi Brian,

> >    netif_rx_ni+0xe8/0x120
> >    mwifiex_recv_packet+0xfc/0x10c [mwifiex]
> >    mwifiex_process_rx_packet+0x1d4/0x238 [mwifiex]
> >    mwifiex_11n_dispatch_pkt+0x190/0x1ac [mwifiex]
> >    mwifiex_11n_rx_reorder_pkt+0x28c/0x354 [mwifiex]
> 
> TL;DR: the problem was right here ^^^
> where you started running mwifiex_11n_dispatch_pkt() (via
> mwifiex_11n_scan_and_dispatch()) while holding a spinlock.
> 
> When you do that, you eventually call netif_rx_ni(), which specifically defers
> to softirq contexts. Then, if you happen to have your flush timer expiring just
> before that, you end up in mwifiex_flush_data(), which also needs that
> spinlock.

Understood; Thanks for this detail;

> 
> There are a few possible ways to handle this:
> (a) prevent processing softirqs in that context; e.g., with
>     local_bh_disable(). This seems somewhat of a hack.
>     (Side note: I think most of the locks in this driver really could be
>     spin_lock_bh(), not spin_lock_irqsave() -- we don't really care
>     about hardirq context for 99% of these locks.)
> (b) restructure so that packet processing (e.g., netif_rx_ni()) is done
>     outside of the spinlock.
> 
> It's actually not that hard to do (b). You can just queue your skb's up in a
> temporary sk_buff_head list and process them all at once after you've
> finished processing the reorder table. I have a local patch to do this, and I
> might send it your way if I can give it a bit more testing.


OK; That will be good; We will run a complete test after the patch; (OR we can work on this, share for review);

Regards,
Ganapathi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [EXT] Re: [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage"
  2019-06-04  3:03   ` [EXT] " Ganapathi Bhat
@ 2019-06-04 20:57     ` Brian Norris
  0 siblings, 0 replies; 8+ messages in thread
From: Brian Norris @ 2019-06-04 20:57 UTC (permalink / raw)
  To: Ganapathi Bhat
  Cc: linux-kernel, Amitkumar Karwar, Nishant Sarmukadam, Xinming Hu,
	linux-wireless, stable

Hi Ganapathi,

On Mon, Jun 3, 2019 at 8:04 PM Ganapathi Bhat <gbhat@marvell.com> wrote:
> > There are a few possible ways to handle this:
> > (a) prevent processing softirqs in that context; e.g., with
> >     local_bh_disable(). This seems somewhat of a hack.
> >     (Side note: I think most of the locks in this driver really could be
> >     spin_lock_bh(), not spin_lock_irqsave() -- we don't really care
> >     about hardirq context for 99% of these locks.)
> > (b) restructure so that packet processing (e.g., netif_rx_ni()) is done
> >     outside of the spinlock.
> >
> > It's actually not that hard to do (b). You can just queue your skb's up in a
> > temporary sk_buff_head list and process them all at once after you've
> > finished processing the reorder table. I have a local patch to do this, and I
> > might send it your way if I can give it a bit more testing.
>
> OK; That will be good; We will run a complete test after the patch; (OR we can work on this, share for review);

I gave my work another round of testing and submitted it here:

https://patchwork.kernel.org/cover/10976089/
[PATCH 0/2] mwifiex: spinlock usage improvements

Feel free to give it a spin. It doesn't completely resolve everything
you were trying to fix, but I believe it helps nudge things in the
right direction.

Brian

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-06-04 20:57 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-30 17:59 [4.20 PATCH] Revert "mwifiex: restructure rx_reorder_tbl_lock usage" Brian Norris
2018-11-30 19:04 ` [EXT] " Ganapathi Bhat
2018-12-01  2:27   ` Brian Norris
2018-12-03 12:37 ` Kalle Valo
2018-12-13 13:54 ` Kalle Valo
2019-03-08  2:34 ` Brian Norris
2019-06-04  3:03   ` [EXT] " Ganapathi Bhat
2019-06-04 20:57     ` Brian Norris

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).