linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
@ 2022-06-23  5:59 Duoming Zhou
  2022-06-23  9:46 ` Greg KH
  0 siblings, 1 reply; 7+ messages in thread
From: Duoming Zhou @ 2022-06-23  5:59 UTC (permalink / raw)
  To: linux-staging, gregkh
  Cc: davem, alexander.deucher, kuba, broonie, linux-kernel, Duoming Zhou

There are sleep in atomic context bugs when dm_fsync_timer_callback is
executing. The root cause is that the memory allocation functions with
GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
which is a timer handler. The call paths that could trigger bugs are
shown below:

    (interrupt context)
dm_fsync_timer_callback
  write_nic_byte
    kzalloc(sizeof(data), GFP_KERNEL); //may sleep
    usb_control_msg
      kmalloc(.., GFP_NOIO); //may sleep
  write_nic_dword
    kzalloc(sizeof(data), GFP_KERNEL); //may sleep
    usb_control_msg
      kmalloc(.., GFP_NOIO); //may sleep

This patch uses delayed work to replace timer and moves the operations
that may sleep into the delayed work in order to mitigate bugs.

Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
---
Changes in v2:
  - Use delayed work to replace timer.

 drivers/staging/rtl8192u/r8192U.h    |  2 +-
 drivers/staging/rtl8192u/r8192U_dm.c | 38 +++++++++++++---------------
 drivers/staging/rtl8192u/r8192U_dm.h |  2 +-
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/rtl8192u/r8192U.h b/drivers/staging/rtl8192u/r8192U.h
index 14ca00a2789..1942cb84937 100644
--- a/drivers/staging/rtl8192u/r8192U.h
+++ b/drivers/staging/rtl8192u/r8192U.h
@@ -1013,7 +1013,7 @@ typedef struct r8192_priv {
 	bool		bis_any_nonbepkts;
 	bool		bcurrent_turbo_EDCA;
 	bool		bis_cur_rdlstate;
-	struct timer_list fsync_timer;
+	struct delayed_work fsync_work;
 	bool bfsync_processing;	/* 500ms Fsync timer is active or not */
 	u32	rate_record;
 	u32	rateCountDiffRecord;
diff --git a/drivers/staging/rtl8192u/r8192U_dm.c b/drivers/staging/rtl8192u/r8192U_dm.c
index 725bf5ca9e3..0fcfcaa6500 100644
--- a/drivers/staging/rtl8192u/r8192U_dm.c
+++ b/drivers/staging/rtl8192u/r8192U_dm.c
@@ -2578,19 +2578,20 @@ static void dm_init_fsync(struct net_device *dev)
 	priv->ieee80211->fsync_seconddiff_ratethreshold = 200;
 	priv->ieee80211->fsync_state = Default_Fsync;
 	priv->framesyncMonitor = 1;	/* current default 0xc38 monitor on */
-	timer_setup(&priv->fsync_timer, dm_fsync_timer_callback, 0);
+	INIT_DELAYED_WORK(&priv->fsync_work, dm_fsync_work_callback);
 }
 
 static void dm_deInit_fsync(struct net_device *dev)
 {
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
-	del_timer_sync(&priv->fsync_timer);
+	cancel_delayed_work_sync(&priv->fsync_work);
 }
 
-void dm_fsync_timer_callback(struct timer_list *t)
+void dm_fsync_work_callback(struct work_struct *work)
 {
-	struct r8192_priv *priv = from_timer(priv, t, fsync_timer);
+	struct r8192_priv *priv =
+	    container_of(work, struct r8192_priv, fsync_work.work);
 	struct net_device *dev = priv->ieee80211->dev;
 	u32 rate_index, rate_count = 0, rate_count_diff = 0;
 	bool		bSwitchFromCountDiff = false;
@@ -2657,17 +2658,16 @@ void dm_fsync_timer_callback(struct timer_list *t)
 			}
 		}
 		if (bDoubleTimeInterval) {
-			if (timer_pending(&priv->fsync_timer))
-				del_timer_sync(&priv->fsync_timer);
-			priv->fsync_timer.expires = jiffies +
-				msecs_to_jiffies(priv->ieee80211->fsync_time_interval*priv->ieee80211->fsync_multiple_timeinterval);
-			add_timer(&priv->fsync_timer);
+			cancel_delayed_work_sync(&priv->fsync_work);
+			schedule_delayed_work(&priv->fsync_work,
+					      msecs_to_jiffies(priv
+					      ->ieee80211->fsync_time_interval *
+					      priv->ieee80211->fsync_multiple_timeinterval));
 		} else {
-			if (timer_pending(&priv->fsync_timer))
-				del_timer_sync(&priv->fsync_timer);
-			priv->fsync_timer.expires = jiffies +
-				msecs_to_jiffies(priv->ieee80211->fsync_time_interval);
-			add_timer(&priv->fsync_timer);
+			cancel_delayed_work_sync(&priv->fsync_work);
+			schedule_delayed_work(&priv->fsync_work,
+					      msecs_to_jiffies(priv
+					      ->ieee80211->fsync_time_interval));
 		}
 	} else {
 		/* Let Register return to default value; */
@@ -2695,7 +2695,7 @@ static void dm_EndSWFsync(struct net_device *dev)
 	struct r8192_priv *priv = ieee80211_priv(dev);
 
 	RT_TRACE(COMP_HALDM, "%s\n", __func__);
-	del_timer_sync(&(priv->fsync_timer));
+	cancel_delayed_work_sync(&priv->fsync_work);
 
 	/* Let Register return to default value; */
 	if (priv->bswitch_fsync) {
@@ -2736,11 +2736,9 @@ static void dm_StartSWFsync(struct net_device *dev)
 		if (priv->ieee80211->fsync_rate_bitmap &  rateBitmap)
 			priv->rate_record += priv->stats.received_rate_histogram[1][rateIndex];
 	}
-	if (timer_pending(&priv->fsync_timer))
-		del_timer_sync(&priv->fsync_timer);
-	priv->fsync_timer.expires = jiffies +
-			msecs_to_jiffies(priv->ieee80211->fsync_time_interval);
-	add_timer(&priv->fsync_timer);
+	cancel_delayed_work_sync(&priv->fsync_work);
+	schedule_delayed_work(&priv->fsync_work,
+			      msecs_to_jiffies(priv->ieee80211->fsync_time_interval));
 
 	write_nic_dword(dev, rOFDM0_RxDetector2, 0x465c12cd);
 }
diff --git a/drivers/staging/rtl8192u/r8192U_dm.h b/drivers/staging/rtl8192u/r8192U_dm.h
index 0b2a1c68859..2159018b4e3 100644
--- a/drivers/staging/rtl8192u/r8192U_dm.h
+++ b/drivers/staging/rtl8192u/r8192U_dm.h
@@ -166,7 +166,7 @@ void dm_force_tx_fw_info(struct net_device *dev,
 void dm_init_edca_turbo(struct net_device *dev);
 void dm_rf_operation_test_callback(unsigned long data);
 void dm_rf_pathcheck_workitemcallback(struct work_struct *work);
-void dm_fsync_timer_callback(struct timer_list *t);
+void dm_fsync_work_callback(struct work_struct *work);
 void dm_cck_txpower_adjust(struct net_device *dev, bool  binch14);
 void dm_shadow_init(struct net_device *dev);
 void dm_initialize_txpower_tracking(struct net_device *dev);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
  2022-06-23  5:59 [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback Duoming Zhou
@ 2022-06-23  9:46 ` Greg KH
  2022-06-24  6:02   ` duoming
  0 siblings, 1 reply; 7+ messages in thread
From: Greg KH @ 2022-06-23  9:46 UTC (permalink / raw)
  To: Duoming Zhou
  Cc: linux-staging, davem, alexander.deucher, kuba, broonie, linux-kernel

On Thu, Jun 23, 2022 at 01:59:12PM +0800, Duoming Zhou wrote:
> There are sleep in atomic context bugs when dm_fsync_timer_callback is
> executing. The root cause is that the memory allocation functions with
> GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
> which is a timer handler. The call paths that could trigger bugs are
> shown below:
> 
>     (interrupt context)
> dm_fsync_timer_callback
>   write_nic_byte
>     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
>     usb_control_msg
>       kmalloc(.., GFP_NOIO); //may sleep
>   write_nic_dword
>     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
>     usb_control_msg
>       kmalloc(.., GFP_NOIO); //may sleep
> 
> This patch uses delayed work to replace timer and moves the operations
> that may sleep into the delayed work in order to mitigate bugs.
> 
> Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
> Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> ---
> Changes in v2:
>   - Use delayed work to replace timer.

Did you test this with real hardware to verify it still works?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
  2022-06-23  9:46 ` Greg KH
@ 2022-06-24  6:02   ` duoming
  2022-06-25  9:21     ` duoming
  0 siblings, 1 reply; 7+ messages in thread
From: duoming @ 2022-06-24  6:02 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-staging, davem, alexander.deucher, kuba, broonie, linux-kernel

Hello,

On Thu, 23 Jun 2022 11:46:41 +0200 Greg KH wrote:

> On Thu, Jun 23, 2022 at 01:59:12PM +0800, Duoming Zhou wrote:
> > There are sleep in atomic context bugs when dm_fsync_timer_callback is
> > executing. The root cause is that the memory allocation functions with
> > GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
> > which is a timer handler. The call paths that could trigger bugs are
> > shown below:
> > 
> >     (interrupt context)
> > dm_fsync_timer_callback
> >   write_nic_byte
> >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> >     usb_control_msg
> >       kmalloc(.., GFP_NOIO); //may sleep
> >   write_nic_dword
> >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> >     usb_control_msg
> >       kmalloc(.., GFP_NOIO); //may sleep
> > 
> > This patch uses delayed work to replace timer and moves the operations
> > that may sleep into the delayed work in order to mitigate bugs.
> > 
> > Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
> > Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> > ---
> > Changes in v2:
> >   - Use delayed work to replace timer.
> 
> Did you test this with real hardware to verify it still works?

I am testing this and I will give you feedback within one or two days.

Best regards,
Duoming Zhou

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
  2022-06-24  6:02   ` duoming
@ 2022-06-25  9:21     ` duoming
  2022-06-25  9:30       ` Greg KH
  0 siblings, 1 reply; 7+ messages in thread
From: duoming @ 2022-06-25  9:21 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-staging, davem, alexander.deucher, kuba, broonie, linux-kernel

Hello,

> > On Thu, Jun 23, 2022 at 01:59:12PM +0800, Duoming Zhou wrote:
> > > There are sleep in atomic context bugs when dm_fsync_timer_callback is
> > > executing. The root cause is that the memory allocation functions with
> > > GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
> > > which is a timer handler. The call paths that could trigger bugs are
> > > shown below:
> > > 
> > >     (interrupt context)
> > > dm_fsync_timer_callback
> > >   write_nic_byte
> > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > >     usb_control_msg
> > >       kmalloc(.., GFP_NOIO); //may sleep
> > >   write_nic_dword
> > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > >     usb_control_msg
> > >       kmalloc(.., GFP_NOIO); //may sleep
> > > 
> > > This patch uses delayed work to replace timer and moves the operations
> > > that may sleep into the delayed work in order to mitigate bugs.
> > > 
> > > Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
> > > Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> > > ---
> > > Changes in v2:
> > >   - Use delayed work to replace timer.
> > 
> > Did you test this with real hardware to verify it still works?
> 
> I am testing this and I will give you feedback within one or two days.

Do you know what vendor id and device id use the r8192u_usb driver?
I try many hardwares, they could not trigger this code.

Thank you!

Best regards,
Duoming Zhou

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
  2022-06-25  9:21     ` duoming
@ 2022-06-25  9:30       ` Greg KH
  2022-07-10  9:44         ` duoming
  0 siblings, 1 reply; 7+ messages in thread
From: Greg KH @ 2022-06-25  9:30 UTC (permalink / raw)
  To: duoming
  Cc: linux-staging, davem, alexander.deucher, kuba, broonie, linux-kernel

On Sat, Jun 25, 2022 at 05:21:06PM +0800, duoming@zju.edu.cn wrote:
> Hello,
> 
> > > On Thu, Jun 23, 2022 at 01:59:12PM +0800, Duoming Zhou wrote:
> > > > There are sleep in atomic context bugs when dm_fsync_timer_callback is
> > > > executing. The root cause is that the memory allocation functions with
> > > > GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
> > > > which is a timer handler. The call paths that could trigger bugs are
> > > > shown below:
> > > > 
> > > >     (interrupt context)
> > > > dm_fsync_timer_callback
> > > >   write_nic_byte
> > > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > > >     usb_control_msg
> > > >       kmalloc(.., GFP_NOIO); //may sleep
> > > >   write_nic_dword
> > > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > > >     usb_control_msg
> > > >       kmalloc(.., GFP_NOIO); //may sleep
> > > > 
> > > > This patch uses delayed work to replace timer and moves the operations
> > > > that may sleep into the delayed work in order to mitigate bugs.
> > > > 
> > > > Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
> > > > Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> > > > ---
> > > > Changes in v2:
> > > >   - Use delayed work to replace timer.
> > > 
> > > Did you test this with real hardware to verify it still works?
> > 
> > I am testing this and I will give you feedback within one or two days.
> 
> Do you know what vendor id and device id use the r8192u_usb driver?

The vendor/device ids are in the driver itself.  Also in the output of
modinfo:
	❯ modinfo drivers/staging/rtl8192u/r8192u_usb.ko | grep alias
	alias:          usb:v043Ep7A01d*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v5A57p0290d*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v2001p3301d*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v1740p9201d*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v0DF6p0031d*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v050Dp805Ed*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v07AAp0043d*dc*dsc*dp*ic*isc*ip*in*
	alias:          usb:v0BDAp8709d*dc*dsc*dp*ic*isc*ip*in*

see the "v" and "p" portions of the alias string.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
  2022-06-25  9:30       ` Greg KH
@ 2022-07-10  9:44         ` duoming
  2022-07-10 10:02           ` Greg KH
  0 siblings, 1 reply; 7+ messages in thread
From: duoming @ 2022-07-10  9:44 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-staging, davem, alexander.deucher, kuba, broonie, linux-kernel

Hello,

On Sat, 25 Jun 2022 11:30:13 +0200 Greg KH wrote:

> On Sat, Jun 25, 2022 at 05:21:06PM +0800, duoming@zju.edu.cn wrote:
> > Hello,
> > 
> > > > On Thu, Jun 23, 2022 at 01:59:12PM +0800, Duoming Zhou wrote:
> > > > > There are sleep in atomic context bugs when dm_fsync_timer_callback is
> > > > > executing. The root cause is that the memory allocation functions with
> > > > > GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
> > > > > which is a timer handler. The call paths that could trigger bugs are
> > > > > shown below:
> > > > > 
> > > > >     (interrupt context)
> > > > > dm_fsync_timer_callback
> > > > >   write_nic_byte
> > > > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > > > >     usb_control_msg
> > > > >       kmalloc(.., GFP_NOIO); //may sleep
> > > > >   write_nic_dword
> > > > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > > > >     usb_control_msg
> > > > >       kmalloc(.., GFP_NOIO); //may sleep
> > > > > 
> > > > > This patch uses delayed work to replace timer and moves the operations
> > > > > that may sleep into the delayed work in order to mitigate bugs.
> > > > > 
> > > > > Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
> > > > > Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> > > > > ---
> > > > > Changes in v2:
> > > > >   - Use delayed work to replace timer.
> > > > 
> > > > Did you test this with real hardware to verify it still works?
> > > 
> > > I am testing this and I will give you feedback within one or two days.
> > 
> > Do you know what vendor id and device id use the r8192u_usb driver?
> 
> The vendor/device ids are in the driver itself.  Also in the output of
> modinfo:
> 	❯ modinfo drivers/staging/rtl8192u/r8192u_usb.ko | grep alias
> 	alias:          usb:v043Ep7A01d*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v5A57p0290d*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v2001p3301d*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v1740p9201d*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v0DF6p0031d*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v050Dp805Ed*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v07AAp0043d*dc*dsc*dp*ic*isc*ip*in*
> 	alias:          usb:v0BDAp8709d*dc*dsc*dp*ic*isc*ip*in*
> 
> see the "v" and "p" portions of the alias string. 

I am sorry for the delay. This is a really hard work, but I successfully test it.
This patch could run well.

The following is the report of the bug:

[  104.055321][    C3] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274
[  104.075499][    C3] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0
[  104.076346][    C3] preempt_count: 100, expected: 0
[  104.076938][    C3] RCU nest depth: 0, expected: 0
[  104.076938][    C3] Preemption disabled at:
[  104.078194][    C3] [<ffffffff846000a0>] __do_softirq+0xa0/0x544
[  104.083981][    C3] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc5-00213-g7c895ef88403-dirty #65
[  104.083981][    C3] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[  104.083981][    C3] Call Trace:
[  104.083981][    C3]  <IRQ>
[  104.083981][    C3]  dump_stack_lvl+0xbf/0xee
[  104.083981][    C3]  __might_resched+0x386/0x4b0
[  104.083981][    C3]  ? irq_exit_rcu+0x4d/0xa0
[  104.083981][    C3]  ? __do_softirq+0xa0/0x544
[  104.083981][    C3]  ? write_nic_dword+0x70/0x160
[  104.083981][    C3]  kmem_cache_alloc_trace+0x3a/0x240
[  104.083981][    C3]  write_nic_dword+0x70/0x160
[  104.083981][    C3]  dm_fsync_timer_callback+0x1c4/0xd80
[  104.083981][    C3]  ? dm_rf_pathcheck_workitemcallback+0x1150/0x1150
[  104.083981][    C3]  call_timer_fn+0x2d/0x1c0
[  104.083981][    C3]  ? dm_rf_pathcheck_workitemcallback+0x1150/0x1150
[  104.083981][    C3]  expire_timers+0x1f3/0x320
[  104.083981][    C3]  __run_timers+0x3ff/0x4d0
[  104.083981][    C3]  ? lapic_next_event+0x61/0x70
[  104.083981][    C3]  run_timer_softirq+0x41/0x80
[  104.083981][    C3]  __do_softirq+0x233/0x544
[  104.083981][    C3]  irq_exit_rcu+0x41/0xa0
[  104.083981][    C3]  sysvec_apic_timer_interrupt+0x8c/0xb0
[  104.083981][    C3]  </IRQ>
[  104.083981][    C3]  <TASK>
[  104.083981][    C3]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
[  104.083981][    C3] RIP: 0010:rtl8192_usb_initendpoints+0xa0/0x310
[  104.083981][    C3] Code: 85 ed 0f 84 3c 02 00 00 48 89 1c 24 31 ff be c0 0c 00 00 e8 a2 19 b8 ff 48 89 c5 43 80 3c 37 00 74 08 4c 89 ef e8 80 8
[  104.083981][    C3] RSP: 0018:ffff888006e279f8 EFLAGS: 00000246
[  104.083981][    C3] RAX: ffff8880097b0300 RBX: ffff88800bf00000 RCX: ffffffff831e1751
[  104.083981][    C3] RDX: 0000000000000001 RSI: 0000000000000004 RDI: ffff8880097b0320
[  104.083981][    C3] RBP: ffff8880097b0300 R08: dffffc0000000000 R09: ffffed10012f6061
[  104.083981][    C3] R10: dfffe910012f6062 R11: 1ffff110012f6060 R12: ffff88800bf0fff4
[  104.083981][    C3] R13: ffff88800bf09140 R14: dffffc0000000000 R15: 1ffff110017e1228

After using delayed work to replace timer, the following is the result triggered by gdb. 
As we can see, the dm_fsync_work_callback() is successfully triggered.

[#0] 0xffffffff8368bd6b → dm_fsync_work_callback(work=0xffff88800bda98b8)
[#1] 0xffffffff8118a583 → process_one_work(worker=0xffff888006f5b200, work=0xffff88800bda98b8)
[#2] 0xffffffff8118b526 → worker_thread(__worker=0xffff88800bda98b8)
[#3] 0xffffffff81199d72 → kthread(_create=<optimized out>)
[#4] 0xffffffff81003902 → ret_from_fork()

The dm_fsync_work_callback() could run well and there is no bug report anymore.

Best regards,
Duoming Zhou

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
  2022-07-10  9:44         ` duoming
@ 2022-07-10 10:02           ` Greg KH
  0 siblings, 0 replies; 7+ messages in thread
From: Greg KH @ 2022-07-10 10:02 UTC (permalink / raw)
  To: duoming
  Cc: linux-staging, davem, alexander.deucher, kuba, broonie, linux-kernel

On Sun, Jul 10, 2022 at 05:44:16PM +0800, duoming@zju.edu.cn wrote:
> Hello,
> 
> On Sat, 25 Jun 2022 11:30:13 +0200 Greg KH wrote:
> 
> > On Sat, Jun 25, 2022 at 05:21:06PM +0800, duoming@zju.edu.cn wrote:
> > > Hello,
> > > 
> > > > > On Thu, Jun 23, 2022 at 01:59:12PM +0800, Duoming Zhou wrote:
> > > > > > There are sleep in atomic context bugs when dm_fsync_timer_callback is
> > > > > > executing. The root cause is that the memory allocation functions with
> > > > > > GFP_KERNEL or GFP_NOIO parameters are called in dm_fsync_timer_callback
> > > > > > which is a timer handler. The call paths that could trigger bugs are
> > > > > > shown below:
> > > > > > 
> > > > > >     (interrupt context)
> > > > > > dm_fsync_timer_callback
> > > > > >   write_nic_byte
> > > > > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > > > > >     usb_control_msg
> > > > > >       kmalloc(.., GFP_NOIO); //may sleep
> > > > > >   write_nic_dword
> > > > > >     kzalloc(sizeof(data), GFP_KERNEL); //may sleep
> > > > > >     usb_control_msg
> > > > > >       kmalloc(.., GFP_NOIO); //may sleep
> > > > > > 
> > > > > > This patch uses delayed work to replace timer and moves the operations
> > > > > > that may sleep into the delayed work in order to mitigate bugs.
> > > > > > 
> > > > > > Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
> > > > > > Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> > > > > > ---
> > > > > > Changes in v2:
> > > > > >   - Use delayed work to replace timer.
> > > > > 
> > > > > Did you test this with real hardware to verify it still works?
> > > > 
> > > > I am testing this and I will give you feedback within one or two days.
> > > 
> > > Do you know what vendor id and device id use the r8192u_usb driver?
> > 
> > The vendor/device ids are in the driver itself.  Also in the output of
> > modinfo:
> > 	❯ modinfo drivers/staging/rtl8192u/r8192u_usb.ko | grep alias
> > 	alias:          usb:v043Ep7A01d*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v5A57p0290d*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v2001p3301d*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v1740p9201d*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v0DF6p0031d*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v050Dp805Ed*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v07AAp0043d*dc*dsc*dp*ic*isc*ip*in*
> > 	alias:          usb:v0BDAp8709d*dc*dsc*dp*ic*isc*ip*in*
> > 
> > see the "v" and "p" portions of the alias string. 
> 
> I am sorry for the delay. This is a really hard work, but I successfully test it.
> This patch could run well.
> 
> The following is the report of the bug:
> 
> [  104.055321][    C3] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274
> [  104.075499][    C3] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0
> [  104.076346][    C3] preempt_count: 100, expected: 0
> [  104.076938][    C3] RCU nest depth: 0, expected: 0
> [  104.076938][    C3] Preemption disabled at:
> [  104.078194][    C3] [<ffffffff846000a0>] __do_softirq+0xa0/0x544
> [  104.083981][    C3] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc5-00213-g7c895ef88403-dirty #65
> [  104.083981][    C3] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
> [  104.083981][    C3] Call Trace:
> [  104.083981][    C3]  <IRQ>
> [  104.083981][    C3]  dump_stack_lvl+0xbf/0xee
> [  104.083981][    C3]  __might_resched+0x386/0x4b0
> [  104.083981][    C3]  ? irq_exit_rcu+0x4d/0xa0
> [  104.083981][    C3]  ? __do_softirq+0xa0/0x544
> [  104.083981][    C3]  ? write_nic_dword+0x70/0x160
> [  104.083981][    C3]  kmem_cache_alloc_trace+0x3a/0x240
> [  104.083981][    C3]  write_nic_dword+0x70/0x160
> [  104.083981][    C3]  dm_fsync_timer_callback+0x1c4/0xd80
> [  104.083981][    C3]  ? dm_rf_pathcheck_workitemcallback+0x1150/0x1150
> [  104.083981][    C3]  call_timer_fn+0x2d/0x1c0
> [  104.083981][    C3]  ? dm_rf_pathcheck_workitemcallback+0x1150/0x1150
> [  104.083981][    C3]  expire_timers+0x1f3/0x320
> [  104.083981][    C3]  __run_timers+0x3ff/0x4d0
> [  104.083981][    C3]  ? lapic_next_event+0x61/0x70
> [  104.083981][    C3]  run_timer_softirq+0x41/0x80
> [  104.083981][    C3]  __do_softirq+0x233/0x544
> [  104.083981][    C3]  irq_exit_rcu+0x41/0xa0
> [  104.083981][    C3]  sysvec_apic_timer_interrupt+0x8c/0xb0
> [  104.083981][    C3]  </IRQ>
> [  104.083981][    C3]  <TASK>
> [  104.083981][    C3]  asm_sysvec_apic_timer_interrupt+0x1b/0x20
> [  104.083981][    C3] RIP: 0010:rtl8192_usb_initendpoints+0xa0/0x310
> [  104.083981][    C3] Code: 85 ed 0f 84 3c 02 00 00 48 89 1c 24 31 ff be c0 0c 00 00 e8 a2 19 b8 ff 48 89 c5 43 80 3c 37 00 74 08 4c 89 ef e8 80 8
> [  104.083981][    C3] RSP: 0018:ffff888006e279f8 EFLAGS: 00000246
> [  104.083981][    C3] RAX: ffff8880097b0300 RBX: ffff88800bf00000 RCX: ffffffff831e1751
> [  104.083981][    C3] RDX: 0000000000000001 RSI: 0000000000000004 RDI: ffff8880097b0320
> [  104.083981][    C3] RBP: ffff8880097b0300 R08: dffffc0000000000 R09: ffffed10012f6061
> [  104.083981][    C3] R10: dfffe910012f6062 R11: 1ffff110012f6060 R12: ffff88800bf0fff4
> [  104.083981][    C3] R13: ffff88800bf09140 R14: dffffc0000000000 R15: 1ffff110017e1228
> 
> After using delayed work to replace timer, the following is the result triggered by gdb. 
> As we can see, the dm_fsync_work_callback() is successfully triggered.
> 
> [#0] 0xffffffff8368bd6b → dm_fsync_work_callback(work=0xffff88800bda98b8)
> [#1] 0xffffffff8118a583 → process_one_work(worker=0xffff888006f5b200, work=0xffff88800bda98b8)
> [#2] 0xffffffff8118b526 → worker_thread(__worker=0xffff88800bda98b8)
> [#3] 0xffffffff81199d72 → kthread(_create=<optimized out>)
> [#4] 0xffffffff81003902 → ret_from_fork()
> 
> The dm_fsync_work_callback() could run well and there is no bug report anymore.

Great, can you resend this, it's not in my queue anymore.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-07-10 10:02 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-23  5:59 [PATCH v2] staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback Duoming Zhou
2022-06-23  9:46 ` Greg KH
2022-06-24  6:02   ` duoming
2022-06-25  9:21     ` duoming
2022-06-25  9:30       ` Greg KH
2022-07-10  9:44         ` duoming
2022-07-10 10:02           ` Greg KH

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).