All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] How to fix an async scan - rmmod race?
@ 2012-04-05 13:58 Tomas Henzl
  2012-04-05 15:57 ` Mike Christie
                   ` (3 more replies)
  0 siblings, 4 replies; 38+ messages in thread
From: Tomas Henzl @ 2012-04-05 13:58 UTC (permalink / raw)
  To: 'linux-scsi@vger.kernel.org'; +Cc: Stanislaw Gruszka

When a rmmod is tried then in some cases the kernel is not able to handle a paging request:
[  727.154296] BUG: unable to handle kernel paging request at ffffffffa01874b8
[  727.161473] IP: [<ffffffff8141d1e1>] do_scsi_scan_host+0x61/0xa0
[  727.167637] PGD 1c07067 PUD 1c0b063 PMD 178d9ed067 PTE 0
[  727.173369] Oops: 0000 [#1] SMP
[  727.176808] CPU 0
[  727.178691] Modules linked in: ses enclosure mlx4_ib mlx4_en microcode serio_raw joydev i2c_i801 iTCO_wdt i2c_core iTCO_vendor_support ioatdma mlx4_core scsi_transport_sas igb raid_class i7core_edac dca edac_core ib_ipoib ib_cm ib_addr ib_sa ib_uverbs ib_umad ib_mad ib_core ipmi_poweroff ipmi_watchdog ipmi_devintf ipmi_si ipmi_msghandler [last unloaded: mpt2sas]
[  727.213474]
[  727.215065] Pid: 2439, comm: scsi_scan_6 Tainted: G        W    3.2.3-2.fc16.x86_64.debug #1 Supermicro X8DTH-i/6/iF/6F/X8DTH
[  727.226690] RIP: 0010:[<ffffffff8141d1e1>]  [<ffffffff8141d1e1>] do_scsi_scan_host+0x61/0xa0
[  727.235334] RSP: 0018:ffff88178b2abe40  EFLAGS: 00010246
[  727.240736] RAX: ffffffffa0187420 RBX: ffff881775f74290 RCX: 0000000000000001
[  727.247962] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000282
[  727.255188] RBP: ffff88178b2abe50 R08: 0000000000000000 R09: 0000000000000001
[  727.262413] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000100067e40
[  727.269638] R13: ffffffff8141d220 R14: ffff88178c3d28f8 R15: 0000000000000000
[  727.276857] FS:  0000000000000000(0000) GS:ffff8817de600000(0000) knlGS:0000000000000000
[  727.285094] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  727.290936] CR2: ffffffffa01874b8 CR3: 0000000001c05000 CR4: 00000000000006f0
[  727.298163] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  727.305388] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  727.312607] Process scsi_scan_6 (pid: 2439, threadinfo ffff88178b2aa000, task ffff88178a552450)
[  727.321439] Stack:
[  727.323548]  ffff881775f74290 ffff88178c3d28f8 ffff88178b2abe90 ffffffff8141d245
[  727.331336]  ffff88178c3d28f8 ffff8817745cbcf8 ffff88178c3d28f8 ffffffff8141d220
[  727.339131]  0000000000000000 0000000000000000 ffff88178b2abf40 ffffffff810a33e0
[  727.346928] Call Trace:
[  727.349472]  [<ffffffff8141d245>] do_scan_async+0x25/0x160
[  727.355049]  [<ffffffff8141d220>] ? do_scsi_scan_host+0xa0/0xa0
[  727.362260]  [<ffffffff810a33e0>] kthread+0xb0/0xc0
[  727.367228]  [<ffffffff8167ec04>] kernel_thread_helper+0x4/0x10
[  727.373244]  [<ffffffff816744b4>] ? retint_restore_args+0x13/0x13
[  727.379432]  [<ffffffff810a3330>] ? __init_kthread_worker+0x70/0x70
[  727.385792]  [<ffffffff8167ec00>] ? gs_change+0x13/0x13
[  727.391105] Code: d2 48 8b 83 00 02 00 00 48 8b 80 98 00 00 00 eb 21 66 0f 1f 84 00 00 00 00 00 bf 0a 00 00 00 e8 a6 1b c7 ff 48 8b 83 00 02 00 00 <48> 8b 80 98 00 00 00 48 8b 35 11 4e 8d 00 48 89 df 4c 29 e6 ff
[  727.414105] RIP  [<ffffffff8141d1e1>] do_scsi_scan_host+0x61/0xa0
[  727.420355]  RSP <ffff88178b2abe40>
[  727.423933] CR2: ffffffffa01874b8
[  727.427340] ---[ end trace 87ef4ae7ab723385 ]---
>From what I observerved it happens when when we call the rmmod only a while after a modprobe
(in this case it is the mpt2sas driver). More accurately said, it happens when rmmod is called
while scsi async is still at work. The driver is removed but the scsi_host_template is still filled
with now invalid pointers, in this case it is most likely the hostt->scan_finished which causes the BUG.

A logical step is to wait in scsi_remove_host until the async scan ends.
@@ -172,6 +173,9 @@ void scsi_remove_host(struct Scsi_Host *shost)
 	scsi_forget_host(shost);
 	mutex_unlock(&shost->scan_mutex);
 	scsi_proc_host_rm(shost);
+	
+	while (shost->async_scan)
+		msleep(10);
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (scsi_host_set_state(shost, SHOST_DEL))

And a change in scsi_finish_async_scan is needed too - moving the 'async_scan = 0;' so it is
after other functions which could touch a shost.
@@ -1800,9 +1795,6 @@ static void scsi_finish_async_scan(struct async_scan_data *data)
 
 	scsi_sysfs_add_devices(shost);
 
-	spin_lock_irqsave(shost->host_lock, flags);
-	shost->async_scan = 0;
-	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	mutex_unlock(&shost->scan_mutex);
 
@@ -1816,7 +1808,11 @@ static void scsi_finish_async_scan(struct async_scan_data *data)
 	spin_unlock(&async_scan_lock);
 
 	scsi_autopm_put_host(shost);
-	scsi_host_put(shost);
+	
+	spin_lock_irqsave(shost->host_lock, flags);
+	shost->async_scan = 0;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+	
 	kfree(data);
 }

When the async scan is protected this way a further protection via ref. count is no more needed
so remove it
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 29c4c04..be9e6fe 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1743,10 +1743,9 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
-		goto err;
-	data->shost = scsi_host_get(shost);
-	if (!data->shost)
-		goto err;
+		return NULL;
+
+	data->shost = shost;
 	init_completion(&data->prev_finished);
 
 	mutex_lock(&shost->scan_mutex);

And tune the do_scsi_scan_host so it ends faster in case the host is being removed
@@ -1827,8 +1823,14 @@ static void do_scsi_scan_host(struct Scsi_Host *shost)
 		if (shost->hostt->scan_start)
 			shost->hostt->scan_start(shost);
 
-		while (!shost->hostt->scan_finished(shost, jiffies - start))
+		while (!shost->hostt->scan_finished(shost, jiffies - start)) {
 			msleep(10);
+			if (!scsi_host_scan_allowed(shost)) {
+				printk (KERN_INFO "scsi: host not in proper a "
+					"state, async scan aborted ...\n");
+				break;
+			}
+		}
 	} else {
 		scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD,
 				SCAN_WILD_CARD, 0);
------------------
Is the above solution good enough? I can imagine a ref counting object created after
a new device is attached would solve this better, if it already exists please advice.
Tomas


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 13:58 [RFC] How to fix an async scan - rmmod race? Tomas Henzl
@ 2012-04-05 15:57 ` Mike Christie
  2012-04-05 16:05   ` Mike Christie
  2012-04-05 18:00 ` Bart Van Assche
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 38+ messages in thread
From: Mike Christie @ 2012-04-05 15:57 UTC (permalink / raw)
  To: Tomas Henzl; +Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/05/2012 08:58 AM, Tomas Henzl wrote:
> When a rmmod is tried then in some cases the kernel is not able to handle a paging request:
> [  727.154296] BUG: unable to handle kernel paging request at ffffffffa01874b8
> [  727.161473] IP: [<ffffffff8141d1e1>] do_scsi_scan_host+0x61/0xa0
> [  727.167637] PGD 1c07067 PUD 1c0b063 PMD 178d9ed067 PTE 0
> [  727.173369] Oops: 0000 [#1] SMP
> [  727.176808] CPU 0
> [  727.178691] Modules linked in: ses enclosure mlx4_ib mlx4_en microcode serio_raw joydev i2c_i801 iTCO_wdt i2c_core iTCO_vendor_support ioatdma mlx4_core scsi_transport_sas igb raid_class i7core_edac dca edac_core ib_ipoib ib_cm ib_addr ib_sa ib_uverbs ib_umad ib_mad ib_core ipmi_poweroff ipmi_watchdog ipmi_devintf ipmi_si ipmi_msghandler [last unloaded: mpt2sas]
> [  727.213474]
> [  727.215065] Pid: 2439, comm: scsi_scan_6 Tainted: G        W    3.2.3-2.fc16.x86_64.debug #1 Supermicro X8DTH-i/6/iF/6F/X8DTH
> [  727.226690] RIP: 0010:[<ffffffff8141d1e1>]  [<ffffffff8141d1e1>] do_scsi_scan_host+0x61/0xa0
> [  727.235334] RSP: 0018:ffff88178b2abe40  EFLAGS: 00010246
> [  727.240736] RAX: ffffffffa0187420 RBX: ffff881775f74290 RCX: 0000000000000001
> [  727.247962] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000282
> [  727.255188] RBP: ffff88178b2abe50 R08: 0000000000000000 R09: 0000000000000001
> [  727.262413] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000100067e40
> [  727.269638] R13: ffffffff8141d220 R14: ffff88178c3d28f8 R15: 0000000000000000
> [  727.276857] FS:  0000000000000000(0000) GS:ffff8817de600000(0000) knlGS:0000000000000000
> [  727.285094] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [  727.290936] CR2: ffffffffa01874b8 CR3: 0000000001c05000 CR4: 00000000000006f0
> [  727.298163] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [  727.305388] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> [  727.312607] Process scsi_scan_6 (pid: 2439, threadinfo ffff88178b2aa000, task ffff88178a552450)
> [  727.321439] Stack:
> [  727.323548]  ffff881775f74290 ffff88178c3d28f8 ffff88178b2abe90 ffffffff8141d245
> [  727.331336]  ffff88178c3d28f8 ffff8817745cbcf8 ffff88178c3d28f8 ffffffff8141d220
> [  727.339131]  0000000000000000 0000000000000000 ffff88178b2abf40 ffffffff810a33e0
> [  727.346928] Call Trace:
> [  727.349472]  [<ffffffff8141d245>] do_scan_async+0x25/0x160
> [  727.355049]  [<ffffffff8141d220>] ? do_scsi_scan_host+0xa0/0xa0
> [  727.362260]  [<ffffffff810a33e0>] kthread+0xb0/0xc0
> [  727.367228]  [<ffffffff8167ec04>] kernel_thread_helper+0x4/0x10
> [  727.373244]  [<ffffffff816744b4>] ? retint_restore_args+0x13/0x13
> [  727.379432]  [<ffffffff810a3330>] ? __init_kthread_worker+0x70/0x70
> [  727.385792]  [<ffffffff8167ec00>] ? gs_change+0x13/0x13
> [  727.391105] Code: d2 48 8b 83 00 02 00 00 48 8b 80 98 00 00 00 eb 21 66 0f 1f 84 00 00 00 00 00 bf 0a 00 00 00 e8 a6 1b c7 ff 48 8b 83 00 02 00 00 <48> 8b 80 98 00 00 00 48 8b 35 11 4e 8d 00 48 89 df 4c 29 e6 ff
> [  727.414105] RIP  [<ffffffff8141d1e1>] do_scsi_scan_host+0x61/0xa0
> [  727.420355]  RSP <ffff88178b2abe40>
> [  727.423933] CR2: ffffffffa01874b8
> [  727.427340] ---[ end trace 87ef4ae7ab723385 ]---
> From what I observerved it happens when when we call the rmmod only a while after a modprobe
> (in this case it is the mpt2sas driver). More accurately said, it happens when rmmod is called
> while scsi async is still at work. The driver is removed but the scsi_host_template is still filled
> with now invalid pointers, in this case it is most likely the hostt->scan_finished which causes the BUG.
> 
> A logical step is to wait in scsi_remove_host until the async scan ends.
> @@ -172,6 +173,9 @@ void scsi_remove_host(struct Scsi_Host *shost)
>  	scsi_forget_host(shost);
>  	mutex_unlock(&shost->scan_mutex);
>  	scsi_proc_host_rm(shost);
> +	
> +	while (shost->async_scan)
> +		msleep(10);
>  
>  	spin_lock_irqsave(shost->host_lock, flags);
>  	if (scsi_host_set_state(shost, SHOST_DEL))
> 
> And a change in scsi_finish_async_scan is needed too - moving the 'async_scan = 0;' so it is
> after other functions which could touch a shost.
> @@ -1800,9 +1795,6 @@ static void scsi_finish_async_scan(struct async_scan_data *data)
>  
>  	scsi_sysfs_add_devices(shost);
>  
> -	spin_lock_irqsave(shost->host_lock, flags);
> -	shost->async_scan = 0;
> -	spin_unlock_irqrestore(shost->host_lock, flags);
>  
>  	mutex_unlock(&shost->scan_mutex);
>  
> @@ -1816,7 +1808,11 @@ static void scsi_finish_async_scan(struct async_scan_data *data)
>  	spin_unlock(&async_scan_lock);
>  
>  	scsi_autopm_put_host(shost);
> -	scsi_host_put(shost);
> +	
> +	spin_lock_irqsave(shost->host_lock, flags);
> +	shost->async_scan = 0;
> +	spin_unlock_irqrestore(shost->host_lock, flags);
> +	
>  	kfree(data);
>  }
> 
> When the async scan is protected this way a further protection via ref. count is no more needed
> so remove it
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 29c4c04..be9e6fe 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1743,10 +1743,9 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>  
>  	data = kmalloc(sizeof(*data), GFP_KERNEL);
>  	if (!data)
> -		goto err;
> -	data->shost = scsi_host_get(shost);
> -	if (!data->shost)
> -		goto err;
> +		return NULL;
> +
> +	data->shost = shost;
>  	init_completion(&data->prev_finished);
>  
>  	mutex_lock(&shost->scan_mutex);

Do you need to remove the matching scsi_host_put in
scsi_finish_async_scan too?

> 
> And tune the do_scsi_scan_host so it ends faster in case the host is being removed
> @@ -1827,8 +1823,14 @@ static void do_scsi_scan_host(struct Scsi_Host *shost)
>  		if (shost->hostt->scan_start)
>  			shost->hostt->scan_start(shost);
>  
> -		while (!shost->hostt->scan_finished(shost, jiffies - start))
> +		while (!shost->hostt->scan_finished(shost, jiffies - start)) {
>  			msleep(10);
> +			if (!scsi_host_scan_allowed(shost)) {
> +				printk (KERN_INFO "scsi: host not in proper a "
> +					"state, async scan aborted ...\n");
> +				break;
> +			}
> +		}
>  	} else {
>  		scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD,
>  				SCAN_WILD_CARD, 0);
> ------------------
> Is the above solution good enough?

I think it will sort of work. It is like doing a complete/wait or flush
on the thread. scsi_scan_target users do something similar where they
will queue the scan work on the shost->work_q then wait on the flush in
functions like fc_remove_host.

I guess you could also just convert the code to use workqueues (do a
workqueue_struct for scsi_scan.c and a per host workqueue) then call
flush_work in scsi_remove_host.

The only problem I could think of is if the scan_finished times out,
then do_scsi_scan_host will return and do_scan_async will call
scsi_finish_async_scan and scsi_remove_host will continue to run.
However, if scsi_scan.c was still running something like the sequential
scan/report luns scanning code or if the scsi eh was running and that is
what caused the scan_finished to timeout then that could be accessing
the scsi_host_template and other structs.

I did not look at all the scan_finished callouts to see what they do.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 15:57 ` Mike Christie
@ 2012-04-05 16:05   ` Mike Christie
  0 siblings, 0 replies; 38+ messages in thread
From: Mike Christie @ 2012-04-05 16:05 UTC (permalink / raw)
  To: Tomas Henzl; +Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/05/2012 10:57 AM, Mike Christie wrote:
> The only problem I could think of is if the scan_finished times out,
> then do_scsi_scan_host will return and do_scan_async will call
> scsi_finish_async_scan and scsi_remove_host will continue to run.
> However, if scsi_scan.c was still running something like the sequential
> scan/report luns scanning code or if the scsi eh was running and that is

Ignore the eh comment. The ehhandler is stopped in scsi_host_dev_release.

> what caused the scan_finished to timeout then that could be accessing
> the scsi_host_template and other structs.
> 
> I did not look at all the scan_finished callouts to see what they do.

I looked at the iscsi and fc classes and they should be ok.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 13:58 [RFC] How to fix an async scan - rmmod race? Tomas Henzl
  2012-04-05 15:57 ` Mike Christie
@ 2012-04-05 18:00 ` Bart Van Assche
  2012-04-05 21:29   ` Mike Christie
  2012-04-06  9:39 ` Bart Van Assche
  2012-04-12 12:48 ` [RFC] How to fix an async scan - rmmod race? try_module_get Tomas Henzl
  3 siblings, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-05 18:00 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka, michaelc

On 04/05/12 13:58, Tomas Henzl wrote:

> When a rmmod is tried then in some cases the kernel is not able to handle a paging request:
> [  727.154296] BUG: unable to handle kernel paging request at ffffffffa01874b8
> From what I observerved it happens when when we call the rmmod only a while after a modprobe
> (in this case it is the mpt2sas driver). More accurately said, it happens when rmmod is called
> while scsi async is still at work. The driver is removed but the scsi_host_template is still filled
> with now invalid pointers, in this case it is most likely the hostt->scan_finished which causes the BUG.


Are you sure the above analysis is correct ? I've triggered several
million device removal events with ib_srp but I haven't ever seen the
above crash. Maybe your patch for the SCSI core is hiding a race in the
mpt2sas device removal code.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 18:00 ` Bart Van Assche
@ 2012-04-05 21:29   ` Mike Christie
  2012-04-06  9:24     ` Bart Van Assche
  2012-04-06  9:54     ` Tomas Henzl
  0 siblings, 2 replies; 38+ messages in thread
From: Mike Christie @ 2012-04-05 21:29 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/05/2012 01:00 PM, Bart Van Assche wrote:
> On 04/05/12 13:58, Tomas Henzl wrote:
> 
>> When a rmmod is tried then in some cases the kernel is not able to handle a paging request:
>> [  727.154296] BUG: unable to handle kernel paging request at ffffffffa01874b8
>> From what I observerved it happens when when we call the rmmod only a while after a modprobe
>> (in this case it is the mpt2sas driver). More accurately said, it happens when rmmod is called
>> while scsi async is still at work. The driver is removed but the scsi_host_template is still filled
>> with now invalid pointers, in this case it is most likely the hostt->scan_finished which causes the BUG.
> 
> 
> Are you sure the above analysis is correct ? I've triggered several
> million device removal events with ib_srp but I haven't ever seen the
> above crash.

ib_srp uses scsi_scan_target when the target is added so you are going
down a different code path. If you are rmmoding ib_srp while the driver
is calling scsi_scan_target() that would be a similar problem.

Tomas's bug occurs when drivers use scsi_scan_host, use the async scsi
device scanning, and then rmmod the LLD while the scan is still in progress.

I think a general problem that we might hit similar to Tomas's issue is
when scanning from userspace then rmmoding the driver. Maybe that means
we need a more generic fix? Or, maybe that could be handled by having
scsi_scan() do a try_module_get before scanning.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 21:29   ` Mike Christie
@ 2012-04-06  9:24     ` Bart Van Assche
  2012-04-06 17:22       ` Mike Christie
  2012-04-06  9:54     ` Tomas Henzl
  1 sibling, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06  9:24 UTC (permalink / raw)
  To: Mike Christie
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/05/12 21:29, Mike Christie wrote:

> Tomas's bug occurs when drivers use scsi_scan_host, use the async scsi
> device scanning, and then rmmod the LLD while the scan is still in progress.
> 
> I think a general problem that we might hit similar to Tomas's issue is
> when scanning from userspace then rmmoding the driver. Maybe that means
> we need a more generic fix? Or, maybe that could be handled by having
> scsi_scan() do a try_module_get before scanning.

That suggestion certainly makes sense to me. But Tomas' approach has the
advantage that it guarantees that scanning will have finished once
scsi_remove_hosts() returned and hence has less potential for triggering
race conditions than an approach based on try_module_get().

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 13:58 [RFC] How to fix an async scan - rmmod race? Tomas Henzl
  2012-04-05 15:57 ` Mike Christie
  2012-04-05 18:00 ` Bart Van Assche
@ 2012-04-06  9:39 ` Bart Van Assche
  2012-04-06 10:14   ` Tomas Henzl
  2012-04-12 12:48 ` [RFC] How to fix an async scan - rmmod race? try_module_get Tomas Henzl
  3 siblings, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06  9:39 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka, Mike Christie

On 04/05/12 13:58, Tomas Henzl wrote:

> When the async scan is protected this way a further protection via ref. count is no more needed
> so remove it
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 29c4c04..be9e6fe 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1743,10 +1743,9 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>  
>  	data = kmalloc(sizeof(*data), GFP_KERNEL);
>  	if (!data)
> -		goto err;
> -	data->shost = scsi_host_get(shost);
> -	if (!data->shost)
> -		goto err;
> +		return NULL;
> +
> +	data->shost = shost;
>  	init_completion(&data->prev_finished);
>  
>  	mutex_lock(&shost->scan_mutex);


Maybe it's better to leave the scsi_host_get() / scsi_host_put() pair in
scsi_prep_async_scan() and scsi_finish_async_scan(). Let's have a look
at the following code in scsi_finish_async_scan():

	spin_lock_irqsave(shost->host_lock, flags);
	shost->async_scan = 0;
	spin_unlock_irqrestore(shost->host_lock, flags);

If the context that is waiting for shost->async_scan wouldn't lock
shost->host_lock after having observed that shost->async_scan became
zero then shost->host_lock could already have been freed before
scsi_finish_async_scan() has unlocked that spin lock. Personally I
prefer to avoid having such subtle dependencies between the async
scanning code and the context waiting for it to finish.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-05 21:29   ` Mike Christie
  2012-04-06  9:24     ` Bart Van Assche
@ 2012-04-06  9:54     ` Tomas Henzl
  2012-04-06 15:20       ` James Bottomley
  1 sibling, 1 reply; 38+ messages in thread
From: Tomas Henzl @ 2012-04-06  9:54 UTC (permalink / raw)
  To: Mike Christie
  Cc: Bart Van Assche, 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/05/2012 11:29 PM, Mike Christie wrote:
> On 04/05/2012 01:00 PM, Bart Van Assche wrote:
>> On 04/05/12 13:58, Tomas Henzl wrote:
>>
>>> When a rmmod is tried then in some cases the kernel is not able to handle a paging request:
>>> [  727.154296] BUG: unable to handle kernel paging request at ffffffffa01874b8
>>> From what I observerved it happens when when we call the rmmod only a while after a modprobe
>>> (in this case it is the mpt2sas driver). More accurately said, it happens when rmmod is called
>>> while scsi async is still at work. The driver is removed but the scsi_host_template is still filled
>>> with now invalid pointers, in this case it is most likely the hostt->scan_finished which causes the BUG.
>>
>> Are you sure the above analysis is correct ? I've triggered several
>> million device removal events with ib_srp but I haven't ever seen the
>> above crash.
> ib_srp uses scsi_scan_target when the target is added so you are going
> down a different code path. If you are rmmoding ib_srp while the driver
> is calling scsi_scan_target() that would be a similar problem.
If the driver doesn't define the 'scsi_host_template.scan_finished' then the problem
is less visible. It's because in do_scsi_scan_host another path is taken and  
the scsi_host_scan_allowed test is used to skip further scanning, which I think reduces
the risk significantly. ib_srp doesn't define scan_finished so it is not safe but it is less
likely it will hit this.

>
> Tomas's bug occurs when drivers use scsi_scan_host, use the async scsi
> device scanning, and then rmmod the LLD while the scan is still in progress.
>
> I think a general problem that we might hit similar to Tomas's issue is
> when scanning from userspace then rmmoding the driver. Maybe that means
> we need a more generic fix? Or, maybe that could be handled by having
> scsi_scan() do a try_module_get before scanning.
I like this idea(try_module_get) it is easy/elegant and it is used in scsi_rescan_device,
but a scan can take a lot of time and during that time the driver couldn't be removed.
When a flag in scsi_remove_host is set then the scan can be cancelled, if the user 
rmmods the driver.

> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06  9:39 ` Bart Van Assche
@ 2012-04-06 10:14   ` Tomas Henzl
  2012-04-06 13:13     ` Tomas Henzl
  0 siblings, 1 reply; 38+ messages in thread
From: Tomas Henzl @ 2012-04-06 10:14 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka, Mike Christie

On 04/06/2012 11:39 AM, Bart Van Assche wrote:
> On 04/05/12 13:58, Tomas Henzl wrote:
>
>> When the async scan is protected this way a further protection via ref. count is no more needed
>> so remove it
>> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
>> index 29c4c04..be9e6fe 100644
>> --- a/drivers/scsi/scsi_scan.c
>> +++ b/drivers/scsi/scsi_scan.c
>> @@ -1743,10 +1743,9 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>>  
>>  	data = kmalloc(sizeof(*data), GFP_KERNEL);
>>  	if (!data)
>> -		goto err;
>> -	data->shost = scsi_host_get(shost);
>> -	if (!data->shost)
>> -		goto err;
>> +		return NULL;
>> +
>> +	data->shost = shost;
>>  	init_completion(&data->prev_finished);
>>  
>>  	mutex_lock(&shost->scan_mutex);
>
> Maybe it's better to leave the scsi_host_get() / scsi_host_put() pair in
> scsi_prep_async_scan() and scsi_finish_async_scan(). Let's have a look
> at the following code in scsi_finish_async_scan():
>
> 	spin_lock_irqsave(shost->host_lock, flags);
> 	shost->async_scan = 0;
> 	spin_unlock_irqrestore(shost->host_lock, flags);
>
> If the context that is waiting for shost->async_scan wouldn't lock
> shost->host_lock after having observed that shost->async_scan became
> zero then shost->host_lock could already have been freed before
> scsi_finish_async_scan() has unlocked that spin lock. Personally I
> prefer to avoid having such subtle dependencies between the async
> scanning code and the context waiting for it to finish.
It's also my preference It's not built in for purpose, I only haven't
noticed it :)
Thanks for the advice.

Tomas


>
> Bart.


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 10:14   ` Tomas Henzl
@ 2012-04-06 13:13     ` Tomas Henzl
  2012-04-06 14:38       ` Bart Van Assche
  0 siblings, 1 reply; 38+ messages in thread
From: Tomas Henzl @ 2012-04-06 13:13 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka, Mike Christie

On 04/06/2012 12:14 PM, Tomas Henzl wrote:
> On 04/06/2012 11:39 AM, Bart Van Assche wrote:
>> On 04/05/12 13:58, Tomas Henzl wrote:
>>
>>> When the async scan is protected this way a further protection via ref. count is no more needed
>>> so remove it
>>> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
>>> index 29c4c04..be9e6fe 100644
>>> --- a/drivers/scsi/scsi_scan.c
>>> +++ b/drivers/scsi/scsi_scan.c
>>> @@ -1743,10 +1743,9 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
>>>  
>>>  	data = kmalloc(sizeof(*data), GFP_KERNEL);
>>>  	if (!data)
>>> -		goto err;
>>> -	data->shost = scsi_host_get(shost);
>>> -	if (!data->shost)
>>> -		goto err;
>>> +		return NULL;
>>> +
>>> +	data->shost = shost;
>>>  	init_completion(&data->prev_finished);
>>>  
>>>  	mutex_lock(&shost->scan_mutex);
>> Maybe it's better to leave the scsi_host_get() / scsi_host_put() pair in
>> scsi_prep_async_scan() and scsi_finish_async_scan(). Let's have a look
>> at the following code in scsi_finish_async_scan():
>>
>> 	spin_lock_irqsave(shost->host_lock, flags);
>> 	shost->async_scan = 0;
>> 	spin_unlock_irqrestore(shost->host_lock, flags);
>>
>> If the context that is waiting for shost->async_scan wouldn't lock
>> shost->host_lock after having observed that shost->async_scan became
>> zero then shost->host_lock could already have been freed before
>> scsi_finish_async_scan() has unlocked that spin lock. Personally I
>> prefer to avoid having such subtle dependencies between the async
>> scanning code and the context waiting for it to finish.
Actually that would be worse than to depend on the shost->host_lock       
  while (shost->async_scan)
                msleep(10);
  spin_lock_irqsave(shost->host_lock, flags); /* hi this spinlock is needed for async scan test above */
We can add this comment ^ ,or add locking to the while loop.

Otherwise we will have this in scsi_finish_async_scan
+	spin_lock_irqsave(shost->host_lock, flags);
+	shost->async_scan = 0;
+	spin_unlock_irqrestore(shost->host_lock, flags);

	scsi_host_put(shost);	
...}
This means that after  scsi_remove_host is left and the driver rmmoded we call
scsi_host_put which in the end calls scsi_host_dev_release and this function tries to use
the now invalid host_template functions again. 



> It's also my preference It's not built in for purpose, I only haven't
> noticed it :)
> Thanks for the advice.
>
> Tomas
>
>
>> Bart.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 13:13     ` Tomas Henzl
@ 2012-04-06 14:38       ` Bart Van Assche
  2012-04-06 15:32         ` Tomas Henzl
  0 siblings, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06 14:38 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka, Mike Christie

On 04/06/12 13:13, Tomas Henzl wrote:

> This means that after scsi_remove_host is left and the driver rmmoded

> we call scsi_host_put which in the end calls scsi_host_dev_release
> and this function tries to use the now invalid host_template
> functions again.
There are already several other ways in which the host template can be
accessed after scsi_remove_host() finished:
* Via the scsi_proc_hostdir_rm() call in scsi_host_dev_release().
* From the SCSI error handler thread.
* From inside scsi_dispatch_cmd().

Why isn't the SCSI error handler thread stopped from inside
scsi_remove_host() ? And why isn't scsi_proc_hostdir_rm() invoked from
inside scsi_remove_host() ? Am I missing something ?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06  9:54     ` Tomas Henzl
@ 2012-04-06 15:20       ` James Bottomley
  2012-04-06 16:15         ` Bart Van Assche
  0 siblings, 1 reply; 38+ messages in thread
From: James Bottomley @ 2012-04-06 15:20 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: Mike Christie, Bart Van Assche,
	'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On Fri, 2012-04-06 at 11:54 +0200, Tomas Henzl wrote:
> > Tomas's bug occurs when drivers use scsi_scan_host, use the async scsi
> > device scanning, and then rmmod the LLD while the scan is still in progress.
> >
> > I think a general problem that we might hit similar to Tomas's issue is
> > when scanning from userspace then rmmoding the driver. Maybe that means
> > we need a more generic fix? Or, maybe that could be handled by having
> > scsi_scan() do a try_module_get before scanning.
> I like this idea(try_module_get) it is easy/elegant and it is used in scsi_rescan_device,
> but a scan can take a lot of time and during that time the driver couldn't be removed.
> When a flag in scsi_remove_host is set then the scan can be cancelled, if the user 
> rmmods the driver.

This is my preferred solution too.  The rule for async stuff is either
cancel or wait and since we can't cancel, we need to ensure the wait by
holding the module until the async event has finished.

Since the whole of the host scan must complete, we need to hold the
module across that, but I bet we also need to hold it across user
triggered target scans as well.

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 14:38       ` Bart Van Assche
@ 2012-04-06 15:32         ` Tomas Henzl
  0 siblings, 0 replies; 38+ messages in thread
From: Tomas Henzl @ 2012-04-06 15:32 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: 'linux-scsi@vger.kernel.org', Stanislaw Gruszka, Mike Christie

On 04/06/2012 04:38 PM, Bart Van Assche wrote:
> On 04/06/12 13:13, Tomas Henzl wrote:
>
>> This means that after scsi_remove_host is left and the driver rmmoded
>> we call scsi_host_put which in the end calls scsi_host_dev_release
>> and this function tries to use the now invalid host_template
>> functions again.
> There are already several other ways in which the host template can be
> accessed after scsi_remove_host() finished:
> * Via the scsi_proc_hostdir_rm() call in scsi_host_dev_release().
This ^ is actually the case i wrote about in my previous mail.

> * From the SCSI error handler thread.
> * From inside scsi_dispatch_cmd().
>
> Why isn't the SCSI error handler thread stopped from inside
> scsi_remove_host() ? And why isn't scsi_proc_hostdir_rm() invoked from
> inside scsi_remove_host() ? Am I missing something ?
I'm not the right person to answer this, but it might be really so bad
that the only reason is that module unload of a scsi driver is not used often.

Going back to my initial case, resetting the host template to zero in 
scsi_remove_host and using a locking mechanism to protect host template
would also solve the scan problem (and maybe some other too), what do you think?

tomash

>
> Thanks,
>
> Bart.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 15:20       ` James Bottomley
@ 2012-04-06 16:15         ` Bart Van Assche
  2012-04-06 16:35           ` James Bottomley
  0 siblings, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06 16:15 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tomas Henzl, Mike Christie, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/06/12 15:20, James Bottomley wrote:

> On Fri, 2012-04-06 at 11:54 +0200, Tomas Henzl wrote:
>> I like this idea(try_module_get) it is easy/elegant and it is used in scsi_rescan_device,
>> but a scan can take a lot of time and during that time the driver couldn't be removed.
>> When a flag in scsi_remove_host is set then the scan can be cancelled, if the user 
>> rmmods the driver.
> 
> This is my preferred solution too.  The rule for async stuff is either
> cancel or wait and since we can't cancel, we need to ensure the wait by
> holding the module until the async event has finished.
> 
> Since the whole of the host scan must complete, we need to hold the
> module across that, but I bet we also need to hold it across user
> triggered target scans as well.


As far as I can see the queuecommand call in scsi_dispatch_cmd() can
race with module removal - that call can be triggered while the host
template is being unloaded. I'm not sure though what the best approach
is to fix that race.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 16:15         ` Bart Van Assche
@ 2012-04-06 16:35           ` James Bottomley
  2012-04-06 17:01             ` Bart Van Assche
  0 siblings, 1 reply; 38+ messages in thread
From: James Bottomley @ 2012-04-06 16:35 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Tomas Henzl, Mike Christie, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On Fri, 2012-04-06 at 16:15 +0000, Bart Van Assche wrote:
> On 04/06/12 15:20, James Bottomley wrote:
> 
> > On Fri, 2012-04-06 at 11:54 +0200, Tomas Henzl wrote:
> >> I like this idea(try_module_get) it is easy/elegant and it is used in scsi_rescan_device,
> >> but a scan can take a lot of time and during that time the driver couldn't be removed.
> >> When a flag in scsi_remove_host is set then the scan can be cancelled, if the user 
> >> rmmods the driver.
> > 
> > This is my preferred solution too.  The rule for async stuff is either
> > cancel or wait and since we can't cancel, we need to ensure the wait by
> > holding the module until the async event has finished.
> > 
> > Since the whole of the host scan must complete, we need to hold the
> > module across that, but I bet we also need to hold it across user
> > triggered target scans as well.
> 
> 
> As far as I can see the queuecommand call in scsi_dispatch_cmd() can
> race with module removal - that call can be triggered while the host
> template is being unloaded. I'm not sure though what the best approach
> is to fix that race.

Um, it's a bit hard to see how.  It's not really possible to trigger
queuecommand except in the initial probe without an open device ... and
opening the device holds the module.

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 16:35           ` James Bottomley
@ 2012-04-06 17:01             ` Bart Van Assche
  2012-04-06 17:15               ` James Bottomley
  0 siblings, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06 17:01 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tomas Henzl, Mike Christie, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/06/12 16:35, James Bottomley wrote:

> On Fri, 2012-04-06 at 16:15 +0000, Bart Van Assche wrote:
>> As far as I can see the queuecommand call in scsi_dispatch_cmd() can
>> race with module removal - that call can be triggered while the host
>> template is being unloaded. I'm not sure though what the best approach
>> is to fix that race.
> 
> Um, it's a bit hard to see how.  It's not really possible to trigger
> queuecommand except in the initial probe without an open device ... and
> opening the device holds the module.


Sorry, but I forgot to mention that it's not just scsi_dispatch_cmd()
that invokes queuecommand via the host template. The SCSI error handler
does that too. As far as I can see there is no protection in the SCSI
error handler against LLD module removal. But maybe I overlooked something.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 17:01             ` Bart Van Assche
@ 2012-04-06 17:15               ` James Bottomley
  2012-04-06 17:59                 ` Bart Van Assche
  2012-04-08 17:38                 ` Bart Van Assche
  0 siblings, 2 replies; 38+ messages in thread
From: James Bottomley @ 2012-04-06 17:15 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Tomas Henzl, Mike Christie, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On Fri, 2012-04-06 at 17:01 +0000, Bart Van Assche wrote:
> On 04/06/12 16:35, James Bottomley wrote:
> 
> > On Fri, 2012-04-06 at 16:15 +0000, Bart Van Assche wrote:
> >> As far as I can see the queuecommand call in scsi_dispatch_cmd() can
> >> race with module removal - that call can be triggered while the host
> >> template is being unloaded. I'm not sure though what the best approach
> >> is to fix that race.
> > 
> > Um, it's a bit hard to see how.  It's not really possible to trigger
> > queuecommand except in the initial probe without an open device ... and
> > opening the device holds the module.
> 
> 
> Sorry, but I forgot to mention that it's not just scsi_dispatch_cmd()
> that invokes queuecommand via the host template. The SCSI error handler
> does that too. As far as I can see there is no protection in the SCSI
> error handler against LLD module removal. But maybe I overlooked something.

Consider where the command came from: either it's come from startup (the
current problem), operation (in which case the device must be open to
send and receive it) or teardown (which is synchronous).

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06  9:24     ` Bart Van Assche
@ 2012-04-06 17:22       ` Mike Christie
  2012-04-06 18:37         ` Bart Van Assche
  2012-04-11 21:46         ` Mike Christie
  0 siblings, 2 replies; 38+ messages in thread
From: Mike Christie @ 2012-04-06 17:22 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/06/2012 04:24 AM, Bart Van Assche wrote:
> On 04/05/12 21:29, Mike Christie wrote:
> 
>> Tomas's bug occurs when drivers use scsi_scan_host, use the async scsi
>> device scanning, and then rmmod the LLD while the scan is still in progress.
>>
>> I think a general problem that we might hit similar to Tomas's issue is
>> when scanning from userspace then rmmoding the driver. Maybe that means
>> we need a more generic fix? Or, maybe that could be handled by having
>> scsi_scan() do a try_module_get before scanning.
> 
> That suggestion certainly makes sense to me. But Tomas' approach has the
> advantage that it guarantees that scanning will have finished once
> scsi_remove_hosts() returned and hence has less potential for triggering
> race conditions than an approach based on try_module_get().

Tomas's approach works when scsi_scan_host and the async scsi scanning
is used. However, I was saying that I think we have a 2nd problem that
is similar to Tomas's issue but initiated from a different path and will
bypass Tomas's checks. The host async scanning code does not come into
play when scanning from userspace. In the user scan case we could end up
having:

1. A transport class or scsi_sysfs.c initiate a scan.
2. A user could rmmod the LLD.
3. The LLD will call the transport remove host if there is one and then
scsi_remove_host.

Note that for fc_remove_host there are checks to flush the scanning
workqueue but we will bypass the scan workqueue flush checks since for
this case we are scanning from the user thread and not from the host's
workqueue the fc classes normally uses for scanning.
4. scsi_remove_host would move right past Tomas's code, because the user
initiated scan does not set any of those host async bits Tomas's is
checking.
5. The LLD would then get removed and we would hit the same problem
Tomas's described where the scanning code is now accessing invalid
scsi_host_template fields.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 17:15               ` James Bottomley
@ 2012-04-06 17:59                 ` Bart Van Assche
  2012-04-08 17:38                 ` Bart Van Assche
  1 sibling, 0 replies; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06 17:59 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tomas Henzl, Mike Christie, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/06/12 17:15, James Bottomley wrote:

> On Fri, 2012-04-06 at 17:01 +0000, Bart Van Assche wrote:
>> Sorry, but I forgot to mention that it's not just scsi_dispatch_cmd()
>> that invokes queuecommand via the host template. The SCSI error handler
>> does that too. As far as I can see there is no protection in the SCSI
>> error handler against LLD module removal. But maybe I overlooked something.
> 
> Consider where the command came from: either it's come from startup (the
> current problem), operation (in which case the device must be open to
> send and receive it) or teardown (which is synchronous).


Thanks, that helps. As you might have noticed I'm trying to understand
all the lifetime and reference counting rules in the SCSI core. I've got
another question about the SCSI error handler: how is it guaranteed that
the 'struct scsi_driver' remains in existence as long as the SCSI error
handler thread is active ? scsi_send_eh_cmnd() needs a pointer to the
scsi_driver structure associated with the command it is sending as part
of the error recovery. But I haven't been able to figure out yet how it
is guaranteed that that scsi_driver structure remains around as long as
the SCSI error handler needs it.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 17:22       ` Mike Christie
@ 2012-04-06 18:37         ` Bart Van Assche
  2012-04-11 21:46         ` Mike Christie
  1 sibling, 0 replies; 38+ messages in thread
From: Bart Van Assche @ 2012-04-06 18:37 UTC (permalink / raw)
  To: Mike Christie
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/06/12 17:22, Mike Christie wrote:

> Tomas's approach works when scsi_scan_host and the async scsi scanning
> is used. However, I was saying that I think we have a 2nd problem that
> is similar to Tomas's issue but initiated from a different path and will
> bypass Tomas's checks. The host async scanning code does not come into
> play when scanning from userspace. In the user scan case we could end up
> having:
> 
> 1. A transport class or scsi_sysfs.c initiate a scan.
> 2. A user could rmmod the LLD.
> 3. The LLD will call the transport remove host if there is one and then
> scsi_remove_host.
> 
> Note that for fc_remove_host there are checks to flush the scanning
> workqueue but we will bypass the scan workqueue flush checks since for
> this case we are scanning from the user thread and not from the host's
> workqueue the fc classes normally uses for scanning.
> 4. scsi_remove_host would move right past Tomas's code, because the user
> initiated scan does not set any of those host async bits Tomas's is
> checking.
> 5. The LLD would then get removed and we would hit the same problem
> Tomas's described where the scanning code is now accessing invalid
> scsi_host_template fields.

Good catch. Has it already been considered to remove the sysfs scan (and
other) attributes explicitly from inside scsi_remove_host() ? As far as
I can see that would prevent this race to occur because it would force
scsi_remove_host() to wait until the scan that was triggered from user
space has finished. It would also prevent that other SCSI callbacks
could be triggered from user space after scsi_remove_host() finished and
before the associated devices are removed entirely.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 17:15               ` James Bottomley
  2012-04-06 17:59                 ` Bart Van Assche
@ 2012-04-08 17:38                 ` Bart Van Assche
  2012-04-11 18:17                   ` Mike Christie
  1 sibling, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-08 17:38 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tomas Henzl, Mike Christie, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/06/12 17:15, James Bottomley wrote:

> Consider where the command came from: either it's come from startup (the
> current problem), operation (in which case the device must be open to
> send and receive it) or teardown (which is synchronous).


And what about e.g. sd_check_events() or sr_check_events() ?

Maybe I should explain why I started looking at the SCSI error handler
in so much detail. While running disk removal tests against ib_srp I
found out that the SCSI error handler thread can invoke the LLD's abort
and reset handlers long after scsi_remove_host() finished. That's why I
came up with this patch: "[PATCH 05/15] ib_srp: Avoid that SCSI error
handling triggers a crash"
(http://www.mail-archive.com/linux-rdma@vger.kernel.org/msg11234.html).
But I'm still wondering whether that's the right approach. Should this
issue be addressed in each LLD or in the SCSI core ?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-08 17:38                 ` Bart Van Assche
@ 2012-04-11 18:17                   ` Mike Christie
  2012-04-11 18:30                     ` Mike Christie
  2012-04-11 19:47                     ` Bart Van Assche
  0 siblings, 2 replies; 38+ messages in thread
From: Mike Christie @ 2012-04-11 18:17 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: James Bottomley, Tomas Henzl,
	'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/08/2012 12:38 PM, Bart Van Assche wrote:
> On 04/06/12 17:15, James Bottomley wrote:
> 
>> Consider where the command came from: either it's come from startup (the
>> current problem), operation (in which case the device must be open to
>> send and receive it) or teardown (which is synchronous).
> 
> 
> And what about e.g. sd_check_events() or sr_check_events() ?

Are those setup to run from the open functions, so we will have done a
get on the module (sd_open() -> check_disk_change()) for those.

> 
> Maybe I should explain why I started looking at the SCSI error handler
> in so much detail. While running disk removal tests against ib_srp I
> found out that the SCSI error handler thread can invoke the LLD's abort
> and reset handlers long after scsi_remove_host() finished. That's why I
> came up with this patch: "[PATCH 05/15] ib_srp: Avoid that SCSI error
> handling triggers a crash"
> (http://www.mail-archive.com/linux-rdma@vger.kernel.org/msg11234.html).
> But I'm still wondering whether that's the right approach. Should this
> issue be addressed in each LLD or in the SCSI core ?
> 

I would like scsi core to handle this, but I think right now drivers are
doing work arounds like your patch.

For example FC drivers will call fc_remove_host which sets things up so
no new IO can be sent to the device after that function has completed
and will call some callouts like dev_loss_tmo_callbk to cleanup
outstanding IO. The problem is that when it calls scsi_remove_host that
could cause commands like cache sync from sd.c's shutdown to get failed
(the queuecommand function will call the fc check ready function which
will fail the IO immediately).

iSCSI drivers will call scsi_remove_target, do a iscsi logout and
cleanup outstanding commands if needed, set things up so new IO is
failed (I do not think there should be new IO because scsi_remove_target
should have removed the devices and the iscsi logout/shutdown code made
sure the iscsi queues were empty), then call scsi_remove_host.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-11 18:17                   ` Mike Christie
@ 2012-04-11 18:30                     ` Mike Christie
  2012-04-11 19:47                     ` Bart Van Assche
  1 sibling, 0 replies; 38+ messages in thread
From: Mike Christie @ 2012-04-11 18:30 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: James Bottomley, Tomas Henzl,
	'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/11/2012 01:17 PM, Mike Christie wrote:
> For example FC drivers will call fc_remove_host which sets things up so
> no new IO can be sent to the device after that function has completed
> and will call some callouts like dev_loss_tmo_callbk to cleanup
> outstanding IO. The problem is that when it calls scsi_remove_host that

One correction. I meant to write scsi_target_remove above.

fc_remove_host sets the rport state then calls rport_delete_work which
then calls scsi_target_remove and it does callbacks like
dev_loss_tmo_callbk and terminate_rport_io. So scsi_remove_host would
not cause commands like sync cache to fail. That would be
scsi_target_remove. By the time the driver calls scsi_remove_host the
scsi_target_remove calls would have already removed the devices.

Same thing happens though. Because fc_remove_host sets the rport state
to FC_PORTSTATE_DELETED when scsi_remove_target removes the devices and
the sd.c shutdown code is run, IO from that would get failed by the fc
check ready function.


> could cause commands like cache sync from sd.c's shutdown to get failed
> (the queuecommand function will call the fc check ready function which
> will fail the IO immediately).
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-11 18:17                   ` Mike Christie
  2012-04-11 18:30                     ` Mike Christie
@ 2012-04-11 19:47                     ` Bart Van Assche
  2012-04-11 22:28                       ` Mike Christie
  1 sibling, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-04-11 19:47 UTC (permalink / raw)
  To: Mike Christie
  Cc: James Bottomley, Tomas Henzl,
	'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/11/12 18:17, Mike Christie wrote:

> I would like scsi core to handle this, but I think right now drivers are
> doing work arounds like your patch.


I've been wondering whether the following approach would make sense
(haven't tested this yet):
- Let scsi_remove_host() stop the SCSI error handler thread instead of
  keeping the error handler thread around until scsi_host_dev_release()
  gets invoked.
- After blk_cleanup_queue() finished, kill all outstanding SCSI
  requests from inside scsi_remove_device() (those requests that have
  already been passed to the LLD via queuecommand) instead of waiting
  until the SCSI error handler detects a timeout.

An advantage of that approach would be that independent of the context
from which an I/O request is submitted (scanning / user space / ...)
that no new requests would be passed to the SCSI LLD after
scsi_remove_host() has finished. So this approach could be an
alternative for Tomas' patch at the start of this thread. However, a
disadvantage is that this approach will only work fine if the LLD stops
I/O completion notifications before invoking scsi_remove_host(). Several
LLDs seem to do that, but not ib_srp. But that's something Dave and I
can take care of.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-06 17:22       ` Mike Christie
  2012-04-06 18:37         ` Bart Van Assche
@ 2012-04-11 21:46         ` Mike Christie
  1 sibling, 0 replies; 38+ messages in thread
From: Mike Christie @ 2012-04-11 21:46 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org', Stanislaw Gruszka

On 04/06/2012 12:22 PM, Mike Christie wrote:
> 1. A transport class or scsi_sysfs.c initiate a scan.
> 2. A user could rmmod the LLD.
> 3. The LLD will call the transport remove host if there is one and then
> scsi_remove_host.

You do not have to worry about this. The scsi scan mutex and host state
checks will protect against it.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-11 19:47                     ` Bart Van Assche
@ 2012-04-11 22:28                       ` Mike Christie
  2012-04-12 10:48                         ` Bart Van Assche
  0 siblings, 1 reply; 38+ messages in thread
From: Mike Christie @ 2012-04-11 22:28 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: James Bottomley, Tomas Henzl,
	'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/11/2012 02:47 PM, Bart Van Assche wrote:
> On 04/11/12 18:17, Mike Christie wrote:
> 
>> I would like scsi core to handle this, but I think right now drivers are
>> doing work arounds like your patch.
> 
> 
> I've been wondering whether the following approach would make sense
> (haven't tested this yet):
> - Let scsi_remove_host() stop the SCSI error handler thread instead of
>   keeping the error handler thread around until scsi_host_dev_release()
>   gets invoked.
> - After blk_cleanup_queue() finished, kill all outstanding SCSI
>   requests from inside scsi_remove_device() (those requests that have
>   already been passed to the LLD via queuecommand) instead of waiting
>   until the SCSI error handler detects a timeout.
> 
> An advantage of that approach would be that independent of the context
> from which an I/O request is submitted (scanning / user space / ...)
> that no new requests would be passed to the SCSI LLD after
> scsi_remove_host() has finished. So this approach could be an
> alternative for Tomas' patch at the start of this thread. However, a

I do not think it solve's Tomas's issue, because if a async scan is
running it could have already completed the scan related IO and is just
about to call the slave_configure callout when the user does a rmmod.
The scsi_remove_host from the rmmod will not see any IOs in flight and
go right along removing the host and rmmod will complete and remove the
module. The scsi_scan.c code could then try to access the
slave_configure callout which is now invalid.


> disadvantage is that this approach will only work fine if the LLD stops
> I/O completion notifications before invoking scsi_remove_host(). Several

I don't think you would want to do that, because you have IO from the
sd_shutdown path that you do want to execute. After the remove/shutdown
callouts have been run then you do not want new IO to be sent to the LLD.

So scsi_remove_host sets the host state to cancel initially. It then
calls scsi_forget_host which will loop over devices and remove them.
That could cause IO to be sent by functions like sd_shutdown.

After the ULD code is run __scsi_remove_device will set the state to
SDEV_DEL and scsi_remvoe_host will then set the state to SHOST_DEL. So
that would prevent new IO from getting queued.

But then is there a race that you were hitting? Were you hitting
something like IO was in the reuqest_queue, the sd_shutdown IO got
queued before those IOs, but then the sd shutdown IO completed and the
other queued IO got sent to the LLD before the sdev state could get set
to SDEV_DEL. So then IO could get sent to the LLD queuecommand when we
did not want it to.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race?
  2012-04-11 22:28                       ` Mike Christie
@ 2012-04-12 10:48                         ` Bart Van Assche
  0 siblings, 0 replies; 38+ messages in thread
From: Bart Van Assche @ 2012-04-12 10:48 UTC (permalink / raw)
  To: Mike Christie
  Cc: James Bottomley, Tomas Henzl,
	'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka

On 04/11/12 22:28, Mike Christie wrote:

> On 04/11/2012 02:47 PM, Bart Van Assche wrote:
>> disadvantage is that this approach will only work fine if the LLD stops
>> I/O completion notifications before invoking scsi_remove_host(). Several
> 
> I don't think you would want to do that, because you have IO from the
> sd_shutdown path that you do want to execute. After the remove/shutdown
> callouts have been run then you do not want new IO to be sent to the LLD.
> 
> So scsi_remove_host sets the host state to cancel initially. It then
> calls scsi_forget_host which will loop over devices and remove them.
> That could cause IO to be sent by functions like sd_shutdown.


So that means that with an operational transport layer it's wrong for a
SCSI LLD to stop processing SCSI commands before scsi_remove_host()
finished ? It looks like several SCSI LLD authors are not aware of this.
I have found several examples of high-profile SCSI LLD drivers in the
kernel tree that cause newly submitted SCSI commands to fail during
kernel module removal even before scsi_remove_host() gets invoked.

> After the ULD code is run __scsi_remove_device will set the state to
> SDEV_DEL and scsi_remove_host will then set the state to SHOST_DEL. So
> that would prevent new IO from getting queued.
> 
> But then is there a race that you were hitting?


scsi_remove_host() can get invoked after the SCSI core has submitted a
request to the LLD via queuecommand() but before the LLD has received
the I/O completion notification that will be generated once that request
finishes. I see three alternatives to handle this:
- The LLD stops I/O completion notifications before invoking
  scsi_remove_host() (which is not correct because it prevents
  sd_shutdown() to send SCSI commands to the device).
- The SCSI core keeps the LLD around long enough until it is sure that
  no new I/O notifications will arrive.
- The SCSI LLD stops I/O completion notifications after having invoked
  scsi_remove_host() and kills all pending SCSI commands before
  continuing with LLD-specific host removal tasks. As far as I can see
  the SCSI core doesn't provide a function yet that would allow an
  LLD to kill all pending requests. Maybe blk_abort_queue() could be
  helpful here.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - rmmod race? try_module_get
  2012-04-05 13:58 [RFC] How to fix an async scan - rmmod race? Tomas Henzl
                   ` (2 preceding siblings ...)
  2012-04-06  9:39 ` Bart Van Assche
@ 2012-04-12 12:48 ` Tomas Henzl
  2012-04-18 16:48   ` [RFC] How to fix an async scan - 'rmmod --wait' race? Tomas Henzl
  3 siblings, 1 reply; 38+ messages in thread
From: Tomas Henzl @ 2012-04-12 12:48 UTC (permalink / raw)
  To: 'linux-scsi@vger.kernel.org'
  Cc: Stanislaw Gruszka, Mike Christie, Bart Van Assche

Hi,

the first patch waited in scsi_remove_host was until the scan thread ends, some flags were set
so the thread could be aborted prematurely. 

This patch uses a try_module_get to lock the module and prevents a rmmod while it's locked.
The disadvantage is that in the protection we use again the same host template (shost->hostt->module;),
and when this were not valid I think and oops in try_module_get will follow. It seems to me
that the LLDs use scsi_scan_host only in the initial attach function so this is safe now, but ..

This, and the fact that the initial patch can ends the scan thread in the middle (I hope this is safe?),
makes me prefer the original patch.

A potential fix for a scan invoked from a different location, eg. a user via syfs is maybe also needed, but
can be handled separately. 

Tomas

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 29c4c04..6cf8df3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1839,9 +1839,13 @@ static int do_scan_async(void *_data)
 {
 	struct async_scan_data *data = _data;
 	struct Scsi_Host *shost = data->shost;
+	struct module *mod = shost->hostt->module;
 
 	do_scsi_scan_host(shost);
 	scsi_finish_async_scan(data);
+
+	module_put(mod);
+
 	return 0;
 }
 
@@ -1853,16 +1857,22 @@ void scsi_scan_host(struct Scsi_Host *shost)
 {
 	struct task_struct *p;
 	struct async_scan_data *data;
+	struct module *mod;
 
 	if (strncmp(scsi_scan_type, "none", 4) == 0)
 		return;
 	if (scsi_autopm_get_host(shost) < 0)
 		return;
 
+	mod = shost->hostt->module;
+	if (!try_module_get(mod)) /* module_put is called from do_scan_async */
+		return;
+
 	data = scsi_prep_async_scan(shost);
 	if (!data) {
 		do_scsi_scan_host(shost);
 		scsi_autopm_put_host(shost);
+		module_put(mod);
 		return;
 	}
 



^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-04-12 12:48 ` [RFC] How to fix an async scan - rmmod race? try_module_get Tomas Henzl
@ 2012-04-18 16:48   ` Tomas Henzl
  2012-04-18 18:18     ` Bart Van Assche
  2012-05-17  8:42     ` James Bottomley
  0 siblings, 2 replies; 38+ messages in thread
From: Tomas Henzl @ 2012-04-18 16:48 UTC (permalink / raw)
  To: 'linux-scsi@vger.kernel.org'
  Cc: Stanislaw Gruszka, Mike Christie, Bart Van Assche, stefanr

On 04/12/2012 02:48 PM, Tomas Henzl wrote:
> Hi,
>
> the first patch waited in scsi_remove_host was until the scan thread ends, some flags were set
> so the thread could be aborted prematurely. 
>
> This patch uses a try_module_get to lock the module and prevents a rmmod while it's locked.
> The disadvantage is that in the protection we use again the same host template (shost->hostt->module;),
> and when this were not valid I think and oops in try_module_get will follow. It seems to me
> that the LLDs use scsi_scan_host only in the initial attach function so this is safe now, but ..
Actually I did some testing and it is not so stable as it should be - after 'modprobe mpt2sas && rmmod --wait mpt2sas'
I sometimes see:

[  215.126197] mpt2sas version 12.100.00.00 loaded                                                                                                                                         
[  215.127509] scsi8 : Fusion MPT SAS Host                                                                                                                                                 
[  215.128058] mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (6096496 kB)                                                                                                   
[  215.128167] mpt2sas 0000:03:00.0: irq 69 for MSI/MSI-X
[  215.128190] mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 69
[  215.128192] mpt2sas0: iomem(0x00000000f7040000), mapped(0xffffc90006010000), size(16384)
[  215.128194] mpt2sas0: ioport(0x000000000000d000), size(256)
[  215.241249] mpt2sas0: Allocated physical memory: size(3988 kB)
[  215.241253] mpt2sas0: Current Controller Queue Depth(1753), Max Controller Queue Depth(2000)
[  215.241256] mpt2sas0: Scatter Gather Elements per IO(128)
[  215.300281] mpt2sas0: LSISAS2008: FWVersion(11.00.00.00), ChipRevision(0x01), BiosVersion(07.23.01.00)
[  215.300284] mpt2sas0: Protocol=(Initiator), Capabilities=(Raid,TLR,EEDP,Snapshot Buffer,Diag Trace Buffer,Task Set Full,NCQ)
[  215.300602] mpt2sas0: sending port enable !!
[  216.854720] mpt2sas0: host_add: handle(0x0001), sas_addr(0x500605b0006b8530), phys(8)
[  222.978469] mpt2sas0: port enable: SUCCESS
[  222.980104] scsi 8:0:0:0: Direct-Access     SEAGATE  ST9146852SS      0005 PQ: 0 ANSI: 5
[  222.980113] scsi 8:0:0:0: SSP: handle(0x0009), sas_addr(0x5000c5001ac10319), phy(2), device_name(0x5000c5001ac10318)
[  222.980117] scsi 8:0:0:0: SSP: enclosure_logical_id(0x500605b0006b8530), slot(2)
[  222.980121] scsi 8:0:0:0: qdepth(254), tagged(1), simple(0), ordered(0), scsi_level(6), cmd_que(1)
[  222.981149] mpt2sas version 12.100.00.00 unloading
[  222.981467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
[  222.981483] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
[  222.981499] PGD 0 
[  222.981507] Oops: 0000 [#1] SMP 
[  222.981518] CPU 0 
[  222.981522] Modules linked in: mpt2sas(-) lockd bnep bluetooth be2iscsi iscsi_boot_sysfs bnx2i cnic uio ip6t_REJECT cxgb4i cxgb4 cxgb3i
[  222.981571] mpt2sas0: removing handle(0x0009), sas_addr(0x5000c5001ac10319)
[  222.981583]  libcxgbi cxgb3 nf_conntrack_ipv6 nf_defrag_ipv6 mdio nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip6table_filter ip6_tables snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep i7core_edac edac_core snd_seq snd_seq_device hp_wmi iTCO_wdt iTCO_vendor_support snd_pcm snd_timer snd microcode soundcore snd_page_alloc sparse_keymap serio_raw tg3 rfkill uinput sunrpc firewire_ohci firewire_core crc_itu_t scsi_transport_sas raid_class nouveau ttm drm_kms_helper drm i2c_core mxm_wmi video wmi [last unloaded: mpt2sas]
[  222.981790] 
[  222.981796] Pid: 1311, comm: scsi_scan_8 Not tainted 3.3.0 #21 Hewlett-Packard HP Z400 Workstation/0B4Ch
[  222.981815] RIP: 0010:[<ffffffff811f0c45>]  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
[  222.981829] RSP: 0018:ffff8801929a7ce0  EFLAGS: 00010246
[  222.981837] RAX: ffff880190c65810 RBX: ffff880190d5a438 RCX: 0000000000000000
[  222.981846] RDX: ffff880199034c60 RSI: ffff880192a81018 RDI: ffff880190d5a438
[  222.981854] RBP: ffff8801929a7d10 R08: ffff880199034c60 R09: ffff880195516d80
[  222.981863] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
[  222.981872] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880190d5a400
[  222.981881] FS:  0000000000000000(0000) GS:ffff88019fc00000(0000) knlGS:0000000000000000
[  222.981891] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  222.981899] CR2: 0000000000000079 CR3: 0000000001c05000 CR4: 00000000000006f0
[  222.981913] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  222.981922] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  222.981932] Process scsi_scan_8 (pid: 1311, threadinfo ffff8801929a6000, task ffff88018b6e8000)
[  222.981941] Stack:
[  222.981946]  ffff8801929a7d30 ffffffff810583a5 ffff8801929a7d10 ffff880190d5a438
[  222.981965] mpt2sas0: sending diag reset !!
[  222.983072]  ffff880190c65810 0000000000000000 ffff8801929a7d60 ffffffff812c2e6e
[  222.983644]  ffff8801929a7d66 0044b82fa09b5a53 ffff8801929a7dd0 ffff880190d5a438
[  222.984215] Call Trace:
[  222.984776]  [<ffffffff810583a5>] ? console_unlock+0x1f5/0x270
[  222.985334]  [<ffffffff812c2e6e>] kobject_add_internal+0xae/0x250
[  222.985888]  [<ffffffff812c3417>] kobject_add+0x67/0xc0
[  222.986461]  [<ffffffff8139eb32>] device_add+0x102/0x6c0
[  222.987050]  [<ffffffff813c8288>] scsi_sysfs_add_sdev+0x198/0x340
[  222.987643]  [<ffffffff813c6b24>] do_scan_async+0x84/0x170
[  222.988241]  [<ffffffff813c6aa0>] ? do_scsi_scan_host+0xa0/0xa0
[  222.988845]  [<ffffffff81079c63>] kthread+0x93/0xa0
[  222.989443]  [<ffffffff815fd1e4>] kernel_thread_helper+0x4/0x10
[  222.990046]  [<ffffffff81079bd0>] ? kthread_freezable_should_stop+0x70/0x70
[  222.990655]  [<ffffffff815fd1e0>] ? gs_change+0x13/0x13
[  222.991256] Code: 83 ec 18 66 66 66 66 90 48 85 ff 48 89 fb 0f 84 98 00 00 00 48 8b 47 18 49 c7 c4 20 0e c5 81 48 85 c0 74 04 4c 8b 60 30 45 31 ed <41> 80 7c 24 79 00 75 5b 48 89 df e8 8b 2c 0d 00 48 85 c0 74 66 
[  222.992012] RIP  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
[  222.992626]  RSP <ffff8801929a7ce0>
[  222.993265] CR2: 0000000000000079
[  222.993943] ---[ end trace 024b7be18310b205 ]---
[  224.049467] mpt2sas0: diag reset: SUCCESS

This means that the module protection with try_module_get doesn't work, the driver removal starts again
in the middle of a scan.
>From man rmmod the parameter wait means
"Normally, rmmod will refuse to unload modules which are in use. With this option, rmmod will isolate the module, and wait until the module is no  longer  used.
Nothing new will be able to use the module, but it's up to you to make sure the current users eventually finish with it."
This^ means that try_module_get will fail after a 'rmmod --wait' is invoked.

And in scsi.c actually is code which ignores the return value of try_module_get, it has been added here:

85b6c720b0931101c8bcc3a5abdc2b8514b0fb4b [SCSI] sd: fix cache flushing on module removal (and individual device removal)
+       /* We can fail this if we're doing SCSI operations
+        * from module exit (like cache flush) */
+       try_module_get(sdev->host->hostt->module);


I verified this theory with this debug patch

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c                                                                                                                                     
index 07322ec..9c3827c 100644                                                                                                                                                              
--- a/drivers/scsi/scsi.c                                                                                                                                                                  
+++ b/drivers/scsi/scsi.c                                                                                                                                                                  
@@ -1070,9 +1070,9 @@ int scsi_device_get(struct scsi_device *sdev)                                                                                                                        
                return -ENXIO;
        if (!get_device(&sdev->sdev_gendev))
                return -ENXIO;
-       /* We can fail this if we're doing SCSI operations
-        * from module exit (like cache flush) */
-       try_module_get(sdev->host->hostt->module);
+
+        if (!try_module_get(sdev->host->hostt->module))
+                sdev->mod_counter++;
 
        return 0;
 }
@@ -1091,10 +1091,10 @@ void scsi_device_put(struct scsi_device *sdev)
 #ifdef CONFIG_MODULE_UNLOAD
        struct module *module = sdev->host->hostt->module;
 
-       /* The module refcount will be zero if scsi_device_get()
-        * was called from a module removal routine */
-       if (module && module_refcount(module) != 0)
-               module_put(module);
+        if (sdev->mod_counter)
+                sdev->mod_counter--;
+        else
+                module_put(module);
 #endif
        put_device(&sdev->sdev_gendev);
 }

This patch^ can't solve all problems associated with this problem, I have invented a few more approaches, all of them
are ugly. Probably adding a comment 'This  option  can be extremely dangerous:' to the rmmod man is the easiest for now.
And long term goal were not to use try_module_get in scsi_device_get ?



^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-04-18 16:48   ` [RFC] How to fix an async scan - 'rmmod --wait' race? Tomas Henzl
@ 2012-04-18 18:18     ` Bart Van Assche
  2012-05-17  8:42     ` James Bottomley
  1 sibling, 0 replies; 38+ messages in thread
From: Bart Van Assche @ 2012-04-18 18:18 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, stefanr

On 04/18/12 16:48, Tomas Henzl wrote:

> And long term goal were not to use try_module_get in scsi_device_get ?


With a properly implemented eh_abort_handler the patch below should
avoid that error handler functions get invoked after scsi_remove_host()
finished. This patch needs further testing though - a similar patch has
been reverted in the beginning of 2011 (see also commit
09c9d4c9b6a2b5909ae3c6265e4cd3820b636863). So I'm not entirely sure
whether the patch below is the right approach.

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4aa41d1..c9f6000 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1691,6 +1691,7 @@ struct request_queue *scsi_alloc_queue(struct
scsi_device *sdev)
 void scsi_free_queue(struct request_queue *q)
 {
 	blk_cleanup_queue(q);
+	blk_abort_queue(q);
 }

 /*
-- 
1.7.7


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-04-18 16:48   ` [RFC] How to fix an async scan - 'rmmod --wait' race? Tomas Henzl
  2012-04-18 18:18     ` Bart Van Assche
@ 2012-05-17  8:42     ` James Bottomley
  2012-05-17  8:55       ` Bart Van Assche
  1 sibling, 1 reply; 38+ messages in thread
From: James Bottomley @ 2012-05-17  8:42 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, Bart Van Assche, stefanr

On Wed, 2012-04-18 at 18:48 +0200, Tomas Henzl wrote:
> On 04/12/2012 02:48 PM, Tomas Henzl wrote:
> > Hi,
> >
> > the first patch waited in scsi_remove_host was until the scan thread ends, some flags were set
> > so the thread could be aborted prematurely. 
> >
> > This patch uses a try_module_get to lock the module and prevents a rmmod while it's locked.
> > The disadvantage is that in the protection we use again the same host template (shost->hostt->module;),
> > and when this were not valid I think and oops in try_module_get will follow. It seems to me
> > that the LLDs use scsi_scan_host only in the initial attach function so this is safe now, but ..
> Actually I did some testing and it is not so stable as it should be - after 'modprobe mpt2sas && rmmod --wait mpt2sas'
> I sometimes see:
> 
> [  215.126197] mpt2sas version 12.100.00.00 loaded                                                                                                                                         
> [  215.127509] scsi8 : Fusion MPT SAS Host                                                                                                                                                 
> [  215.128058] mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (6096496 kB)                                                                                                   
> [  215.128167] mpt2sas 0000:03:00.0: irq 69 for MSI/MSI-X
> [  215.128190] mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 69
> [  215.128192] mpt2sas0: iomem(0x00000000f7040000), mapped(0xffffc90006010000), size(16384)
> [  215.128194] mpt2sas0: ioport(0x000000000000d000), size(256)
> [  215.241249] mpt2sas0: Allocated physical memory: size(3988 kB)
> [  215.241253] mpt2sas0: Current Controller Queue Depth(1753), Max Controller Queue Depth(2000)
> [  215.241256] mpt2sas0: Scatter Gather Elements per IO(128)
> [  215.300281] mpt2sas0: LSISAS2008: FWVersion(11.00.00.00), ChipRevision(0x01), BiosVersion(07.23.01.00)
> [  215.300284] mpt2sas0: Protocol=(Initiator), Capabilities=(Raid,TLR,EEDP,Snapshot Buffer,Diag Trace Buffer,Task Set Full,NCQ)
> [  215.300602] mpt2sas0: sending port enable !!
> [  216.854720] mpt2sas0: host_add: handle(0x0001), sas_addr(0x500605b0006b8530), phys(8)
> [  222.978469] mpt2sas0: port enable: SUCCESS
> [  222.980104] scsi 8:0:0:0: Direct-Access     SEAGATE  ST9146852SS      0005 PQ: 0 ANSI: 5
> [  222.980113] scsi 8:0:0:0: SSP: handle(0x0009), sas_addr(0x5000c5001ac10319), phy(2), device_name(0x5000c5001ac10318)
> [  222.980117] scsi 8:0:0:0: SSP: enclosure_logical_id(0x500605b0006b8530), slot(2)
> [  222.980121] scsi 8:0:0:0: qdepth(254), tagged(1), simple(0), ordered(0), scsi_level(6), cmd_que(1)
> [  222.981149] mpt2sas version 12.100.00.00 unloading
> [  222.981467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
> [  222.981483] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> [  222.981499] PGD 0 
> [  222.981507] Oops: 0000 [#1] SMP 
> [  222.981518] CPU 0 
> [  222.981522] Modules linked in: mpt2sas(-) lockd bnep bluetooth be2iscsi iscsi_boot_sysfs bnx2i cnic uio ip6t_REJECT cxgb4i cxgb4 cxgb3i
> [  222.981571] mpt2sas0: removing handle(0x0009), sas_addr(0x5000c5001ac10319)
> [  222.981583]  libcxgbi cxgb3 nf_conntrack_ipv6 nf_defrag_ipv6 mdio nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip6table_filter ip6_tables snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep i7core_edac edac_core snd_seq snd_seq_device hp_wmi iTCO_wdt iTCO_vendor_support snd_pcm snd_timer snd microcode soundcore snd_page_alloc sparse_keymap serio_raw tg3 rfkill uinput sunrpc firewire_ohci firewire_core crc_itu_t scsi_transport_sas raid_class nouveau ttm drm_kms_helper drm i2c_core mxm_wmi video wmi [last unloaded: mpt2sas]
> [  222.981790] 
> [  222.981796] Pid: 1311, comm: scsi_scan_8 Not tainted 3.3.0 #21 Hewlett-Packard HP Z400 Workstation/0B4Ch
> [  222.981815] RIP: 0010:[<ffffffff811f0c45>]  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> [  222.981829] RSP: 0018:ffff8801929a7ce0  EFLAGS: 00010246
> [  222.981837] RAX: ffff880190c65810 RBX: ffff880190d5a438 RCX: 0000000000000000
> [  222.981846] RDX: ffff880199034c60 RSI: ffff880192a81018 RDI: ffff880190d5a438
> [  222.981854] RBP: ffff8801929a7d10 R08: ffff880199034c60 R09: ffff880195516d80
> [  222.981863] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
> [  222.981872] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880190d5a400
> [  222.981881] FS:  0000000000000000(0000) GS:ffff88019fc00000(0000) knlGS:0000000000000000
> [  222.981891] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [  222.981899] CR2: 0000000000000079 CR3: 0000000001c05000 CR4: 00000000000006f0
> [  222.981913] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [  222.981922] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> [  222.981932] Process scsi_scan_8 (pid: 1311, threadinfo ffff8801929a6000, task ffff88018b6e8000)
> [  222.981941] Stack:
> [  222.981946]  ffff8801929a7d30 ffffffff810583a5 ffff8801929a7d10 ffff880190d5a438
> [  222.981965] mpt2sas0: sending diag reset !!
> [  222.983072]  ffff880190c65810 0000000000000000 ffff8801929a7d60 ffffffff812c2e6e
> [  222.983644]  ffff8801929a7d66 0044b82fa09b5a53 ffff8801929a7dd0 ffff880190d5a438
> [  222.984215] Call Trace:
> [  222.984776]  [<ffffffff810583a5>] ? console_unlock+0x1f5/0x270
> [  222.985334]  [<ffffffff812c2e6e>] kobject_add_internal+0xae/0x250
> [  222.985888]  [<ffffffff812c3417>] kobject_add+0x67/0xc0
> [  222.986461]  [<ffffffff8139eb32>] device_add+0x102/0x6c0
> [  222.987050]  [<ffffffff813c8288>] scsi_sysfs_add_sdev+0x198/0x340
> [  222.987643]  [<ffffffff813c6b24>] do_scan_async+0x84/0x170
> [  222.988241]  [<ffffffff813c6aa0>] ? do_scsi_scan_host+0xa0/0xa0
> [  222.988845]  [<ffffffff81079c63>] kthread+0x93/0xa0
> [  222.989443]  [<ffffffff815fd1e4>] kernel_thread_helper+0x4/0x10
> [  222.990046]  [<ffffffff81079bd0>] ? kthread_freezable_should_stop+0x70/0x70
> [  222.990655]  [<ffffffff815fd1e0>] ? gs_change+0x13/0x13
> [  222.991256] Code: 83 ec 18 66 66 66 66 90 48 85 ff 48 89 fb 0f 84 98 00 00 00 48 8b 47 18 49 c7 c4 20 0e c5 81 48 85 c0 74 04 4c 8b 60 30 45 31 ed <41> 80 7c 24 79 00 75 5b 48 89 df e8 8b 2c 0d 00 48 85 c0 74 66 
> [  222.992012] RIP  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> [  222.992626]  RSP <ffff8801929a7ce0>
> [  222.993265] CR2: 0000000000000079
> [  222.993943] ---[ end trace 024b7be18310b205 ]---
> [  224.049467] mpt2sas0: diag reset: SUCCESS
> 
> This means that the module protection with try_module_get doesn't work, the driver removal starts again
> in the middle of a scan.

Not necessarily; it means that the remove proceeded too far before it
got to the sync point, which is the scan mutex acquisition in
scsi_remove_host().  The fix is to make sure outstanding async work is
completed before beginning the removal.  That can't really be done in
SCSI, but it looks like this might be the trick.

James

---
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 3ec3896..246e652 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/async.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -199,6 +200,13 @@ EXPORT_SYMBOL_GPL(driver_register);
  */
 void driver_unregister(struct device_driver *drv)
 {
+	/*
+	 * make sure all async bits that may be running have completed the way
+	 * this works is that try_module_get() now fails, so new async work
+	 * should take a reference to the module which now fails, so it should
+	 * be safe to remove the driver.
+	 */
+	async_synchronize_full();
 	if (!drv || !drv->p) {
 		WARN(1, "Unexpected driver unregister!\n");
 		return;



^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-17  8:42     ` James Bottomley
@ 2012-05-17  8:55       ` Bart Van Assche
  2012-05-17  9:01         ` James Bottomley
  0 siblings, 1 reply; 38+ messages in thread
From: Bart Van Assche @ 2012-05-17  8:55 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, stefanr

On 05/17/12 08:42, James Bottomley wrote:

> On Wed, 2012-04-18 at 18:48 +0200, Tomas Henzl wrote:
>> On 04/12/2012 02:48 PM, Tomas Henzl wrote:
>>> Hi,
>>>
>>> the first patch waited in scsi_remove_host was until the scan thread ends, some flags were set
>>> so the thread could be aborted prematurely. 
>>>
>>> This patch uses a try_module_get to lock the module and prevents a rmmod while it's locked.
>>> The disadvantage is that in the protection we use again the same host template (shost->hostt->module;),
>>> and when this were not valid I think and oops in try_module_get will follow. It seems to me
>>> that the LLDs use scsi_scan_host only in the initial attach function so this is safe now, but ..
>> Actually I did some testing and it is not so stable as it should be - after 'modprobe mpt2sas && rmmod --wait mpt2sas'
>> I sometimes see:
>>
>> [  215.126197] mpt2sas version 12.100.00.00 loaded                                                                                                                                         
>> [  215.127509] scsi8 : Fusion MPT SAS Host                                                                                                                                                 
>> [  215.128058] mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (6096496 kB)                                                                                                   
>> [  215.128167] mpt2sas 0000:03:00.0: irq 69 for MSI/MSI-X
>> [  215.128190] mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 69
>> [  215.128192] mpt2sas0: iomem(0x00000000f7040000), mapped(0xffffc90006010000), size(16384)
>> [  215.128194] mpt2sas0: ioport(0x000000000000d000), size(256)
>> [  215.241249] mpt2sas0: Allocated physical memory: size(3988 kB)
>> [  215.241253] mpt2sas0: Current Controller Queue Depth(1753), Max Controller Queue Depth(2000)
>> [  215.241256] mpt2sas0: Scatter Gather Elements per IO(128)
>> [  215.300281] mpt2sas0: LSISAS2008: FWVersion(11.00.00.00), ChipRevision(0x01), BiosVersion(07.23.01.00)
>> [  215.300284] mpt2sas0: Protocol=(Initiator), Capabilities=(Raid,TLR,EEDP,Snapshot Buffer,Diag Trace Buffer,Task Set Full,NCQ)
>> [  215.300602] mpt2sas0: sending port enable !!
>> [  216.854720] mpt2sas0: host_add: handle(0x0001), sas_addr(0x500605b0006b8530), phys(8)
>> [  222.978469] mpt2sas0: port enable: SUCCESS
>> [  222.980104] scsi 8:0:0:0: Direct-Access     SEAGATE  ST9146852SS      0005 PQ: 0 ANSI: 5
>> [  222.980113] scsi 8:0:0:0: SSP: handle(0x0009), sas_addr(0x5000c5001ac10319), phy(2), device_name(0x5000c5001ac10318)
>> [  222.980117] scsi 8:0:0:0: SSP: enclosure_logical_id(0x500605b0006b8530), slot(2)
>> [  222.980121] scsi 8:0:0:0: qdepth(254), tagged(1), simple(0), ordered(0), scsi_level(6), cmd_que(1)
>> [  222.981149] mpt2sas version 12.100.00.00 unloading
>> [  222.981467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
>> [  222.981483] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
>> [  222.981499] PGD 0 
>> [  222.981507] Oops: 0000 [#1] SMP 
>> [  222.981518] CPU 0 
>> [  222.981522] Modules linked in: mpt2sas(-) lockd bnep bluetooth be2iscsi iscsi_boot_sysfs bnx2i cnic uio ip6t_REJECT cxgb4i cxgb4 cxgb3i
>> [  222.981571] mpt2sas0: removing handle(0x0009), sas_addr(0x5000c5001ac10319)
>> [  222.981583]  libcxgbi cxgb3 nf_conntrack_ipv6 nf_defrag_ipv6 mdio nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip6table_filter ip6_tables snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep i7core_edac edac_core snd_seq snd_seq_device hp_wmi iTCO_wdt iTCO_vendor_support snd_pcm snd_timer snd microcode soundcore snd_page_alloc sparse_keymap serio_raw tg3 rfkill uinput sunrpc firewire_ohci firewire_core crc_itu_t scsi_transport_sas raid_class nouveau ttm drm_kms_helper drm i2c_core mxm_wmi video wmi [last unloaded: mpt2sas]
>> [  222.981790] 
>> [  222.981796] Pid: 1311, comm: scsi_scan_8 Not tainted 3.3.0 #21 Hewlett-Packard HP Z400 Workstation/0B4Ch
>> [  222.981815] RIP: 0010:[<ffffffff811f0c45>]  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
>> [  222.981829] RSP: 0018:ffff8801929a7ce0  EFLAGS: 00010246
>> [  222.981837] RAX: ffff880190c65810 RBX: ffff880190d5a438 RCX: 0000000000000000
>> [  222.981846] RDX: ffff880199034c60 RSI: ffff880192a81018 RDI: ffff880190d5a438
>> [  222.981854] RBP: ffff8801929a7d10 R08: ffff880199034c60 R09: ffff880195516d80
>> [  222.981863] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
>> [  222.981872] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880190d5a400
>> [  222.981881] FS:  0000000000000000(0000) GS:ffff88019fc00000(0000) knlGS:0000000000000000
>> [  222.981891] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>> [  222.981899] CR2: 0000000000000079 CR3: 0000000001c05000 CR4: 00000000000006f0
>> [  222.981913] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>> [  222.981922] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>> [  222.981932] Process scsi_scan_8 (pid: 1311, threadinfo ffff8801929a6000, task ffff88018b6e8000)
>> [  222.981941] Stack:
>> [  222.981946]  ffff8801929a7d30 ffffffff810583a5 ffff8801929a7d10 ffff880190d5a438
>> [  222.981965] mpt2sas0: sending diag reset !!
>> [  222.983072]  ffff880190c65810 0000000000000000 ffff8801929a7d60 ffffffff812c2e6e
>> [  222.983644]  ffff8801929a7d66 0044b82fa09b5a53 ffff8801929a7dd0 ffff880190d5a438
>> [  222.984215] Call Trace:
>> [  222.984776]  [<ffffffff810583a5>] ? console_unlock+0x1f5/0x270
>> [  222.985334]  [<ffffffff812c2e6e>] kobject_add_internal+0xae/0x250
>> [  222.985888]  [<ffffffff812c3417>] kobject_add+0x67/0xc0
>> [  222.986461]  [<ffffffff8139eb32>] device_add+0x102/0x6c0
>> [  222.987050]  [<ffffffff813c8288>] scsi_sysfs_add_sdev+0x198/0x340
>> [  222.987643]  [<ffffffff813c6b24>] do_scan_async+0x84/0x170
>> [  222.988241]  [<ffffffff813c6aa0>] ? do_scsi_scan_host+0xa0/0xa0
>> [  222.988845]  [<ffffffff81079c63>] kthread+0x93/0xa0
>> [  222.989443]  [<ffffffff815fd1e4>] kernel_thread_helper+0x4/0x10
>> [  222.990046]  [<ffffffff81079bd0>] ? kthread_freezable_should_stop+0x70/0x70
>> [  222.990655]  [<ffffffff815fd1e0>] ? gs_change+0x13/0x13
>> [  222.991256] Code: 83 ec 18 66 66 66 66 90 48 85 ff 48 89 fb 0f 84 98 00 00 00 48 8b 47 18 49 c7 c4 20 0e c5 81 48 85 c0 74 04 4c 8b 60 30 45 31 ed <41> 80 7c 24 79 00 75 5b 48 89 df e8 8b 2c 0d 00 48 85 c0 74 66 
>> [  222.992012] RIP  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
>> [  222.992626]  RSP <ffff8801929a7ce0>
>> [  222.993265] CR2: 0000000000000079
>> [  222.993943] ---[ end trace 024b7be18310b205 ]---
>> [  224.049467] mpt2sas0: diag reset: SUCCESS
>>
>> This means that the module protection with try_module_get doesn't work, the driver removal starts again
>> in the middle of a scan.
> 
> Not necessarily; it means that the remove proceeded too far before it
> got to the sync point, which is the scan mutex acquisition in
> scsi_remove_host().  The fix is to make sure outstanding async work is
> completed before beginning the removal.  That can't really be done in
> SCSI, but it looks like this might be the trick.
> 
> James
> 
> ---
> diff --git a/drivers/base/driver.c b/drivers/base/driver.c
> index 3ec3896..246e652 100644
> --- a/drivers/base/driver.c
> +++ b/drivers/base/driver.c
> @@ -10,6 +10,7 @@
>   *
>   */
>  
> +#include <linux/async.h>
>  #include <linux/device.h>
>  #include <linux/module.h>
>  #include <linux/errno.h>
> @@ -199,6 +200,13 @@ EXPORT_SYMBOL_GPL(driver_register);
>   */
>  void driver_unregister(struct device_driver *drv)
>  {
> +	/*
> +	 * make sure all async bits that may be running have completed the way
> +	 * this works is that try_module_get() now fails, so new async work
> +	 * should take a reference to the module which now fails, so it should
> +	 * be safe to remove the driver.
> +	 */
> +	async_synchronize_full();
>  	if (!drv || !drv->p) {
>  		WARN(1, "Unexpected driver unregister!\n");
>  		return;


I'm hoping there exists another way to fix this. A patch like the above
will cause driver_unregister() to take forever if the rate at which new
devices are added is high enough. As far as I understand
async_synchronize_full() the time during which this function waits is
extended every time scanning of a newly added device starts.

Bart.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-17  8:55       ` Bart Van Assche
@ 2012-05-17  9:01         ` James Bottomley
  2012-05-17 14:51           ` Tomas Henzl
  0 siblings, 1 reply; 38+ messages in thread
From: James Bottomley @ 2012-05-17  9:01 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Tomas Henzl, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, stefanr

On Thu, 2012-05-17 at 08:55 +0000, Bart Van Assche wrote:
> On 05/17/12 08:42, James Bottomley wrote:
> 
> > On Wed, 2012-04-18 at 18:48 +0200, Tomas Henzl wrote:
> >> On 04/12/2012 02:48 PM, Tomas Henzl wrote:
> >>> Hi,
> >>>
> >>> the first patch waited in scsi_remove_host was until the scan thread ends, some flags were set
> >>> so the thread could be aborted prematurely. 
> >>>
> >>> This patch uses a try_module_get to lock the module and prevents a rmmod while it's locked.
> >>> The disadvantage is that in the protection we use again the same host template (shost->hostt->module;),
> >>> and when this were not valid I think and oops in try_module_get will follow. It seems to me
> >>> that the LLDs use scsi_scan_host only in the initial attach function so this is safe now, but ..
> >> Actually I did some testing and it is not so stable as it should be - after 'modprobe mpt2sas && rmmod --wait mpt2sas'
> >> I sometimes see:
> >>
> >> [  215.126197] mpt2sas version 12.100.00.00 loaded                                                                                                                                         
> >> [  215.127509] scsi8 : Fusion MPT SAS Host                                                                                                                                                 
> >> [  215.128058] mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (6096496 kB)                                                                                                   
> >> [  215.128167] mpt2sas 0000:03:00.0: irq 69 for MSI/MSI-X
> >> [  215.128190] mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 69
> >> [  215.128192] mpt2sas0: iomem(0x00000000f7040000), mapped(0xffffc90006010000), size(16384)
> >> [  215.128194] mpt2sas0: ioport(0x000000000000d000), size(256)
> >> [  215.241249] mpt2sas0: Allocated physical memory: size(3988 kB)
> >> [  215.241253] mpt2sas0: Current Controller Queue Depth(1753), Max Controller Queue Depth(2000)
> >> [  215.241256] mpt2sas0: Scatter Gather Elements per IO(128)
> >> [  215.300281] mpt2sas0: LSISAS2008: FWVersion(11.00.00.00), ChipRevision(0x01), BiosVersion(07.23.01.00)
> >> [  215.300284] mpt2sas0: Protocol=(Initiator), Capabilities=(Raid,TLR,EEDP,Snapshot Buffer,Diag Trace Buffer,Task Set Full,NCQ)
> >> [  215.300602] mpt2sas0: sending port enable !!
> >> [  216.854720] mpt2sas0: host_add: handle(0x0001), sas_addr(0x500605b0006b8530), phys(8)
> >> [  222.978469] mpt2sas0: port enable: SUCCESS
> >> [  222.980104] scsi 8:0:0:0: Direct-Access     SEAGATE  ST9146852SS      0005 PQ: 0 ANSI: 5
> >> [  222.980113] scsi 8:0:0:0: SSP: handle(0x0009), sas_addr(0x5000c5001ac10319), phy(2), device_name(0x5000c5001ac10318)
> >> [  222.980117] scsi 8:0:0:0: SSP: enclosure_logical_id(0x500605b0006b8530), slot(2)
> >> [  222.980121] scsi 8:0:0:0: qdepth(254), tagged(1), simple(0), ordered(0), scsi_level(6), cmd_que(1)
> >> [  222.981149] mpt2sas version 12.100.00.00 unloading
> >> [  222.981467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
> >> [  222.981483] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> >> [  222.981499] PGD 0 
> >> [  222.981507] Oops: 0000 [#1] SMP 
> >> [  222.981518] CPU 0 
> >> [  222.981522] Modules linked in: mpt2sas(-) lockd bnep bluetooth be2iscsi iscsi_boot_sysfs bnx2i cnic uio ip6t_REJECT cxgb4i cxgb4 cxgb3i
> >> [  222.981571] mpt2sas0: removing handle(0x0009), sas_addr(0x5000c5001ac10319)
> >> [  222.981583]  libcxgbi cxgb3 nf_conntrack_ipv6 nf_defrag_ipv6 mdio nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip6table_filter ip6_tables snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep i7core_edac edac_core snd_seq snd_seq_device hp_wmi iTCO_wdt iTCO_vendor_support snd_pcm snd_timer snd microcode soundcore snd_page_alloc sparse_keymap serio_raw tg3 rfkill uinput sunrpc firewire_ohci firewire_core crc_itu_t scsi_transport_sas raid_class nouveau ttm drm_kms_helper drm i2c_core mxm_wmi video wmi [last unloaded: mpt2sas]
> >> [  222.981790] 
> >> [  222.981796] Pid: 1311, comm: scsi_scan_8 Not tainted 3.3.0 #21 Hewlett-Packard HP Z400 Workstation/0B4Ch
> >> [  222.981815] RIP: 0010:[<ffffffff811f0c45>]  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> >> [  222.981829] RSP: 0018:ffff8801929a7ce0  EFLAGS: 00010246
> >> [  222.981837] RAX: ffff880190c65810 RBX: ffff880190d5a438 RCX: 0000000000000000
> >> [  222.981846] RDX: ffff880199034c60 RSI: ffff880192a81018 RDI: ffff880190d5a438
> >> [  222.981854] RBP: ffff8801929a7d10 R08: ffff880199034c60 R09: ffff880195516d80
> >> [  222.981863] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
> >> [  222.981872] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880190d5a400
> >> [  222.981881] FS:  0000000000000000(0000) GS:ffff88019fc00000(0000) knlGS:0000000000000000
> >> [  222.981891] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> >> [  222.981899] CR2: 0000000000000079 CR3: 0000000001c05000 CR4: 00000000000006f0
> >> [  222.981913] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> >> [  222.981922] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> >> [  222.981932] Process scsi_scan_8 (pid: 1311, threadinfo ffff8801929a6000, task ffff88018b6e8000)
> >> [  222.981941] Stack:
> >> [  222.981946]  ffff8801929a7d30 ffffffff810583a5 ffff8801929a7d10 ffff880190d5a438
> >> [  222.981965] mpt2sas0: sending diag reset !!
> >> [  222.983072]  ffff880190c65810 0000000000000000 ffff8801929a7d60 ffffffff812c2e6e
> >> [  222.983644]  ffff8801929a7d66 0044b82fa09b5a53 ffff8801929a7dd0 ffff880190d5a438
> >> [  222.984215] Call Trace:
> >> [  222.984776]  [<ffffffff810583a5>] ? console_unlock+0x1f5/0x270
> >> [  222.985334]  [<ffffffff812c2e6e>] kobject_add_internal+0xae/0x250
> >> [  222.985888]  [<ffffffff812c3417>] kobject_add+0x67/0xc0
> >> [  222.986461]  [<ffffffff8139eb32>] device_add+0x102/0x6c0
> >> [  222.987050]  [<ffffffff813c8288>] scsi_sysfs_add_sdev+0x198/0x340
> >> [  222.987643]  [<ffffffff813c6b24>] do_scan_async+0x84/0x170
> >> [  222.988241]  [<ffffffff813c6aa0>] ? do_scsi_scan_host+0xa0/0xa0
> >> [  222.988845]  [<ffffffff81079c63>] kthread+0x93/0xa0
> >> [  222.989443]  [<ffffffff815fd1e4>] kernel_thread_helper+0x4/0x10
> >> [  222.990046]  [<ffffffff81079bd0>] ? kthread_freezable_should_stop+0x70/0x70
> >> [  222.990655]  [<ffffffff815fd1e0>] ? gs_change+0x13/0x13
> >> [  222.991256] Code: 83 ec 18 66 66 66 66 90 48 85 ff 48 89 fb 0f 84 98 00 00 00 48 8b 47 18 49 c7 c4 20 0e c5 81 48 85 c0 74 04 4c 8b 60 30 45 31 ed <41> 80 7c 24 79 00 75 5b 48 89 df e8 8b 2c 0d 00 48 85 c0 74 66 
> >> [  222.992012] RIP  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> >> [  222.992626]  RSP <ffff8801929a7ce0>
> >> [  222.993265] CR2: 0000000000000079
> >> [  222.993943] ---[ end trace 024b7be18310b205 ]---
> >> [  224.049467] mpt2sas0: diag reset: SUCCESS
> >>
> >> This means that the module protection with try_module_get doesn't work, the driver removal starts again
> >> in the middle of a scan.
> > 
> > Not necessarily; it means that the remove proceeded too far before it
> > got to the sync point, which is the scan mutex acquisition in
> > scsi_remove_host().  The fix is to make sure outstanding async work is
> > completed before beginning the removal.  That can't really be done in
> > SCSI, but it looks like this might be the trick.
> > 
> > James
> > 
> > ---
> > diff --git a/drivers/base/driver.c b/drivers/base/driver.c
> > index 3ec3896..246e652 100644
> > --- a/drivers/base/driver.c
> > +++ b/drivers/base/driver.c
> > @@ -10,6 +10,7 @@
> >   *
> >   */
> >  
> > +#include <linux/async.h>
> >  #include <linux/device.h>
> >  #include <linux/module.h>
> >  #include <linux/errno.h>
> > @@ -199,6 +200,13 @@ EXPORT_SYMBOL_GPL(driver_register);
> >   */
> >  void driver_unregister(struct device_driver *drv)
> >  {
> > +	/*
> > +	 * make sure all async bits that may be running have completed the way
> > +	 * this works is that try_module_get() now fails, so new async work
> > +	 * should take a reference to the module which now fails, so it should
> > +	 * be safe to remove the driver.
> > +	 */
> > +	async_synchronize_full();
> >  	if (!drv || !drv->p) {
> >  		WARN(1, "Unexpected driver unregister!\n");
> >  		return;
> 
> 
> I'm hoping there exists another way to fix this. A patch like the above
> will cause driver_unregister() to take forever if the rate at which new
> devices are added is high enough. As far as I understand
> async_synchronize_full() the time during which this function waits is
> extended every time scanning of a newly added device starts.

module removal isn't really supposed to be a common occurrence.  Rusty
even argued it's impossible to fix the races, which is why there's a
kernel option to disallow it.  I don't think inserting a checkpoint in
the async domain for module removal is unreasonable.

If it shows up as unreasonable in practise, we can label all the async
work by THIS_MODULE and implement async_synchronize_module(THIS_MODULE)
which would limit it, but I don't really think there'll be a huge
problem.

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-17  9:01         ` James Bottomley
@ 2012-05-17 14:51           ` Tomas Henzl
  2012-05-22 10:05             ` James Bottomley
  0 siblings, 1 reply; 38+ messages in thread
From: Tomas Henzl @ 2012-05-17 14:51 UTC (permalink / raw)
  To: James Bottomley
  Cc: Bart Van Assche, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, stefanr

On 05/17/2012 11:01 AM, James Bottomley wrote:
> On Thu, 2012-05-17 at 08:55 +0000, Bart Van Assche wrote:
>> On 05/17/12 08:42, James Bottomley wrote:
>>
>>> On Wed, 2012-04-18 at 18:48 +0200, Tomas Henzl wrote:
>>>> On 04/12/2012 02:48 PM, Tomas Henzl wrote:
>>>>> Hi,
>>>>>
>>>>> the first patch waited in scsi_remove_host was until the scan thread ends, some flags were set
>>>>> so the thread could be aborted prematurely. 
>>>>>
>>>>> This patch uses a try_module_get to lock the module and prevents a rmmod while it's locked.
>>>>> The disadvantage is that in the protection we use again the same host template (shost->hostt->module;),
>>>>> and when this were not valid I think and oops in try_module_get will follow. It seems to me
>>>>> that the LLDs use scsi_scan_host only in the initial attach function so this is safe now, but ..
>>>> Actually I did some testing and it is not so stable as it should be - after 'modprobe mpt2sas && rmmod --wait mpt2sas'
>>>> I sometimes see:
>>>>
>>>> [  215.126197] mpt2sas version 12.100.00.00 loaded                                                                                                                                         
>>>> [  215.127509] scsi8 : Fusion MPT SAS Host                                                                                                                                                 
>>>> [  215.128058] mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (6096496 kB)                                                                                                   
>>>> [  215.128167] mpt2sas 0000:03:00.0: irq 69 for MSI/MSI-X
>>>> [  215.128190] mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 69
>>>> [  215.128192] mpt2sas0: iomem(0x00000000f7040000), mapped(0xffffc90006010000), size(16384)
>>>> [  215.128194] mpt2sas0: ioport(0x000000000000d000), size(256)
>>>> [  215.241249] mpt2sas0: Allocated physical memory: size(3988 kB)
>>>> [  215.241253] mpt2sas0: Current Controller Queue Depth(1753), Max Controller Queue Depth(2000)
>>>> [  215.241256] mpt2sas0: Scatter Gather Elements per IO(128)
>>>> [  215.300281] mpt2sas0: LSISAS2008: FWVersion(11.00.00.00), ChipRevision(0x01), BiosVersion(07.23.01.00)
>>>> [  215.300284] mpt2sas0: Protocol=(Initiator), Capabilities=(Raid,TLR,EEDP,Snapshot Buffer,Diag Trace Buffer,Task Set Full,NCQ)
>>>> [  215.300602] mpt2sas0: sending port enable !!
>>>> [  216.854720] mpt2sas0: host_add: handle(0x0001), sas_addr(0x500605b0006b8530), phys(8)
>>>> [  222.978469] mpt2sas0: port enable: SUCCESS
>>>> [  222.980104] scsi 8:0:0:0: Direct-Access     SEAGATE  ST9146852SS      0005 PQ: 0 ANSI: 5
>>>> [  222.980113] scsi 8:0:0:0: SSP: handle(0x0009), sas_addr(0x5000c5001ac10319), phy(2), device_name(0x5000c5001ac10318)
>>>> [  222.980117] scsi 8:0:0:0: SSP: enclosure_logical_id(0x500605b0006b8530), slot(2)
>>>> [  222.980121] scsi 8:0:0:0: qdepth(254), tagged(1), simple(0), ordered(0), scsi_level(6), cmd_que(1)
>>>> [  222.981149] mpt2sas version 12.100.00.00 unloading
>>>> [  222.981467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
>>>> [  222.981483] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
>>>> [  222.981499] PGD 0 
>>>> [  222.981507] Oops: 0000 [#1] SMP 
>>>> [  222.981518] CPU 0 
>>>> [  222.981522] Modules linked in: mpt2sas(-) lockd bnep bluetooth be2iscsi iscsi_boot_sysfs bnx2i cnic uio ip6t_REJECT cxgb4i cxgb4 cxgb3i
>>>> [  222.981571] mpt2sas0: removing handle(0x0009), sas_addr(0x5000c5001ac10319)
>>>> [  222.981583]  libcxgbi cxgb3 nf_conntrack_ipv6 nf_defrag_ipv6 mdio nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip6table_filter ip6_tables snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep i7core_edac edac_core snd_seq snd_seq_device hp_wmi iTCO_wdt iTCO_vendor_support snd_pcm snd_timer snd microcode soundcore snd_page_alloc sparse_keymap serio_raw tg3 rfkill uinput sunrpc firewire_ohci firewire_core crc_itu_t scsi_transport_sas raid_class nouveau ttm drm_kms_helper drm i2c_core mxm_wmi video wmi [last unloaded: mpt2sas]
>>>> [  222.981790] 
>>>> [  222.981796] Pid: 1311, comm: scsi_scan_8 Not tainted 3.3.0 #21 Hewlett-Packard HP Z400 Workstation/0B4Ch
>>>> [  222.981815] RIP: 0010:[<ffffffff811f0c45>]  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
>>>> [  222.981829] RSP: 0018:ffff8801929a7ce0  EFLAGS: 00010246
>>>> [  222.981837] RAX: ffff880190c65810 RBX: ffff880190d5a438 RCX: 0000000000000000
>>>> [  222.981846] RDX: ffff880199034c60 RSI: ffff880192a81018 RDI: ffff880190d5a438
>>>> [  222.981854] RBP: ffff8801929a7d10 R08: ffff880199034c60 R09: ffff880195516d80
>>>> [  222.981863] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
>>>> [  222.981872] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880190d5a400
>>>> [  222.981881] FS:  0000000000000000(0000) GS:ffff88019fc00000(0000) knlGS:0000000000000000
>>>> [  222.981891] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>>>> [  222.981899] CR2: 0000000000000079 CR3: 0000000001c05000 CR4: 00000000000006f0
>>>> [  222.981913] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>>> [  222.981922] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>>>> [  222.981932] Process scsi_scan_8 (pid: 1311, threadinfo ffff8801929a6000, task ffff88018b6e8000)
>>>> [  222.981941] Stack:
>>>> [  222.981946]  ffff8801929a7d30 ffffffff810583a5 ffff8801929a7d10 ffff880190d5a438
>>>> [  222.981965] mpt2sas0: sending diag reset !!
>>>> [  222.983072]  ffff880190c65810 0000000000000000 ffff8801929a7d60 ffffffff812c2e6e
>>>> [  222.983644]  ffff8801929a7d66 0044b82fa09b5a53 ffff8801929a7dd0 ffff880190d5a438
>>>> [  222.984215] Call Trace:
>>>> [  222.984776]  [<ffffffff810583a5>] ? console_unlock+0x1f5/0x270
>>>> [  222.985334]  [<ffffffff812c2e6e>] kobject_add_internal+0xae/0x250
>>>> [  222.985888]  [<ffffffff812c3417>] kobject_add+0x67/0xc0
>>>> [  222.986461]  [<ffffffff8139eb32>] device_add+0x102/0x6c0
>>>> [  222.987050]  [<ffffffff813c8288>] scsi_sysfs_add_sdev+0x198/0x340
>>>> [  222.987643]  [<ffffffff813c6b24>] do_scan_async+0x84/0x170
>>>> [  222.988241]  [<ffffffff813c6aa0>] ? do_scsi_scan_host+0xa0/0xa0
>>>> [  222.988845]  [<ffffffff81079c63>] kthread+0x93/0xa0
>>>> [  222.989443]  [<ffffffff815fd1e4>] kernel_thread_helper+0x4/0x10
>>>> [  222.990046]  [<ffffffff81079bd0>] ? kthread_freezable_should_stop+0x70/0x70
>>>> [  222.990655]  [<ffffffff815fd1e0>] ? gs_change+0x13/0x13
>>>> [  222.991256] Code: 83 ec 18 66 66 66 66 90 48 85 ff 48 89 fb 0f 84 98 00 00 00 48 8b 47 18 49 c7 c4 20 0e c5 81 48 85 c0 74 04 4c 8b 60 30 45 31 ed <41> 80 7c 24 79 00 75 5b 48 89 df e8 8b 2c 0d 00 48 85 c0 74 66 
>>>> [  222.992012] RIP  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
>>>> [  222.992626]  RSP <ffff8801929a7ce0>
>>>> [  222.993265] CR2: 0000000000000079
>>>> [  222.993943] ---[ end trace 024b7be18310b205 ]---
>>>> [  224.049467] mpt2sas0: diag reset: SUCCESS
>>>>
>>>> This means that the module protection with try_module_get doesn't work, the driver removal starts again
>>>> in the middle of a scan.
>>> Not necessarily; it means that the remove proceeded too far before it
>>> got to the sync point, which is the scan mutex acquisition in
>>> scsi_remove_host().  The fix is to make sure outstanding async work is
>>> completed before beginning the removal.  That can't really be done in
>>> SCSI, but it looks like this might be the trick.
>>>
>>> James
>>>
>>> ---
>>> diff --git a/drivers/base/driver.c b/drivers/base/driver.c
>>> index 3ec3896..246e652 100644
>>> --- a/drivers/base/driver.c
>>> +++ b/drivers/base/driver.c
>>> @@ -10,6 +10,7 @@
>>>   *
>>>   */
>>>  
>>> +#include <linux/async.h>
>>>  #include <linux/device.h>
>>>  #include <linux/module.h>
>>>  #include <linux/errno.h>
>>> @@ -199,6 +200,13 @@ EXPORT_SYMBOL_GPL(driver_register);
>>>   */
>>>  void driver_unregister(struct device_driver *drv)
>>>  {
>>> +	/*
>>> +	 * make sure all async bits that may be running have completed the way
>>> +	 * this works is that try_module_get() now fails, so new async work
>>> +	 * should take a reference to the module which now fails, so it should
>>> +	 * be safe to remove the driver.
>>> +	 */
>>> +	async_synchronize_full();
>>>  	if (!drv || !drv->p) {
>>>  		WARN(1, "Unexpected driver unregister!\n");
>>>  		return;
Is this safe? I mean, is it possible that a new scan starts after the
async_synchronize_full() ends?
I wish this were fixed by fixing the real root of this problem which lies
in scssi_device_get:
int scsi_device_get(struct scsi_device *sdev)
{
        if (sdev->sdev_state == SDEV_DEL)
                return -ENXIO;
        if (!get_device(&sdev->sdev_gendev))
                return -ENXIO;
        /* We can fail this if we're doing SCSI operations
         * from module exit (like cache flush) */
        try_module_get(sdev->host->hostt->module);
here  ^ by ignoring the return value. This is why rmmod --wait causes the problem
and likely every other process which calls scsi_device_get would show the same bug.

>>
>> I'm hoping there exists another way to fix this. A patch like the above
>> will cause driver_unregister() to take forever if the rate at which new
>> devices are added is high enough. As far as I understand
>> async_synchronize_full() the time during which this function waits is
>> extended every time scanning of a newly added device starts.
> module removal isn't really supposed to be a common occurrence.  Rusty
> even argued it's impossible to fix the races, which is why there's a
> kernel option to disallow it.  I don't think inserting a checkpoint in
> the async domain for module removal is unreasonable.
>
> If it shows up as unreasonable in practise, we can label all the async
> work by THIS_MODULE and implement async_synchronize_module(THIS_MODULE)
> which would limit it, but I don't really think there'll be a huge
> problem.
>
> James
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-17 14:51           ` Tomas Henzl
@ 2012-05-22 10:05             ` James Bottomley
  2012-05-25 15:13               ` Tomas Henzl
  0 siblings, 1 reply; 38+ messages in thread
From: James Bottomley @ 2012-05-22 10:05 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: Bart Van Assche, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, stefanr

On Thu, 2012-05-17 at 16:51 +0200, Tomas Henzl wrote:
> >>> diff --git a/drivers/base/driver.c b/drivers/base/driver.c
> >>> index 3ec3896..246e652 100644
> >>> --- a/drivers/base/driver.c
> >>> +++ b/drivers/base/driver.c
> >>> @@ -10,6 +10,7 @@
> >>>   *
> >>>   */
> >>>  
> >>> +#include <linux/async.h>
> >>>  #include <linux/device.h>
> >>>  #include <linux/module.h>
> >>>  #include <linux/errno.h>
> >>> @@ -199,6 +200,13 @@ EXPORT_SYMBOL_GPL(driver_register);
> >>>   */
> >>>  void driver_unregister(struct device_driver *drv)
> >>>  {
> >>> +	/*
> >>> +	 * make sure all async bits that may be running have completed the way
> >>> +	 * this works is that try_module_get() now fails, so new async work
> >>> +	 * should take a reference to the module which now fails, so it should
> >>> +	 * be safe to remove the driver.
> >>> +	 */
> >>> +	async_synchronize_full();
> >>>  	if (!drv || !drv->p) {
> >>>  		WARN(1, "Unexpected driver unregister!\n");
> >>>  		return;
> Is this safe? I mean, is it possible that a new scan starts after the
> async_synchronize_full() ends?

Not really, because the module is now removing, so the try module get
guard fails

> I wish this were fixed by fixing the real root of this problem which lies
> in scssi_device_get:
> int scsi_device_get(struct scsi_device *sdev)
> {
>         if (sdev->sdev_state == SDEV_DEL)
>                 return -ENXIO;
>         if (!get_device(&sdev->sdev_gendev))
>                 return -ENXIO;
>         /* We can fail this if we're doing SCSI operations
>          * from module exit (like cache flush) */
>         try_module_get(sdev->host->hostt->module);
> here  ^ by ignoring the return value. This is why rmmod --wait causes the problem
> and likely every other process which calls scsi_device_get would show the same bug.

I'm not sure how ... this has to be usable in the teardown path by which
point try_module_get will fail (as the comment says).  What do you
propose?

James




^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-22 10:05             ` James Bottomley
@ 2012-05-25 15:13               ` Tomas Henzl
  2012-05-25 18:46                 ` Dan Williams
  0 siblings, 1 reply; 38+ messages in thread
From: Tomas Henzl @ 2012-05-25 15:13 UTC (permalink / raw)
  To: James Bottomley
  Cc: Bart Van Assche, 'linux-scsi@vger.kernel.org',
	Stanislaw Gruszka, Mike Christie, stefanr

On 05/22/2012 12:05 PM, James Bottomley wrote:
> On Thu, 2012-05-17 at 16:51 +0200, Tomas Henzl wrote:
>>>>> diff --git a/drivers/base/driver.c b/drivers/base/driver.c
>>>>> index 3ec3896..246e652 100644
>>>>> --- a/drivers/base/driver.c
>>>>> +++ b/drivers/base/driver.c
>>>>> @@ -10,6 +10,7 @@
>>>>>   *
>>>>>   */
>>>>>  
>>>>> +#include <linux/async.h>
>>>>>  #include <linux/device.h>
>>>>>  #include <linux/module.h>
>>>>>  #include <linux/errno.h>
>>>>> @@ -199,6 +200,13 @@ EXPORT_SYMBOL_GPL(driver_register);
>>>>>   */
>>>>>  void driver_unregister(struct device_driver *drv)
>>>>>  {
>>>>> +	/*
>>>>> +	 * make sure all async bits that may be running have completed the way
>>>>> +	 * this works is that try_module_get() now fails, so new async work
>>>>> +	 * should take a reference to the module which now fails, so it should
>>>>> +	 * be safe to remove the driver.
>>>>> +	 */
>>>>> +	async_synchronize_full();
>>>>>  	if (!drv || !drv->p) {
>>>>>  		WARN(1, "Unexpected driver unregister!\n");
>>>>>  		return;
>> Is this safe? I mean, is it possible that a new scan starts after the
>> async_synchronize_full() ends?
> Not really, because the module is now removing, so the try module get
> guard fails
The async_synchronize_full : "This function returns when there are no asynchronous
function calls in the system."
The point is that the async scan is not started with async_schedule but with kthread_run,
so the synchronization doesn't work in this case (an error log is attached at bottom).
>
>> I wish this were fixed by fixing the real root of this problem which lies
>> in scssi_device_get:
>> int scsi_device_get(struct scsi_device *sdev)
>> {
>>         if (sdev->sdev_state == SDEV_DEL)
>>                 return -ENXIO;
>>         if (!get_device(&sdev->sdev_gendev))
>>                 return -ENXIO;
>>         /* We can fail this if we're doing SCSI operations
>>          * from module exit (like cache flush) */
>>         try_module_get(sdev->host->hostt->module);
>> here  ^ by ignoring the return value. This is why rmmod --wait causes the problem
>> and likely every other process which calls scsi_device_get would show the same bug.
> I'm not sure how ... this has to be usable in the teardown path by which
> point try_module_get will fail (as the comment says).  What do you
> propose?
I wish I would knew, but everything seems to be too much complicated, one idea
was to use in the teardown path new special functions only for this use, but...

Tomas

>
> James
>
>

May 25 04:25:54 localhost kernel: [  461.525209] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
May 25 04:25:54 localhost kernel: [  461.525242] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
May 25 04:25:54 localhost kernel: [  461.525259] PGD 0 
May 25 04:25:54 localhost kernel: [  461.525267] Oops: 0000 [#1] SMP 
May 25 04:25:54 localhost kernel: [  461.525278] CPU 0 
May 25 04:25:54 localhost kernel: [  461.525283] Modules linked in: mpt2sas(-) lockd bnep bluetooth be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser
May 25 04:25:54 localhost kernel: [  461.525342] mpt2sas0: removing handle(0x0009), sas_addr(0x5000c5001ac10319)
May 25 04:25:54 localhost kernel: [  461.525355]  rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ip6table_filter ip6_tables i7core_edac edac_core hp_wmi iTCO_wdt snd_hda_codec_realtek tg3 iTCO_vendor_support snd_hda_intel snd_hda_codec snd_hwdep scsi_transport_sas raid_class snd_seq snd_seq_device snd_pcm snd_timer sparse_keymap rfkill serio_raw snd soundcore snd_page_alloc microcode sunrpc uinput firewire_ohci firewire_core crc_itu_t nouveau ttm drm_kms_helper drm i2c_core mxm_wmi video wmi [last unloaded: mpt2sas]
May 25 04:25:54 localhost kernel: [  461.525640] 
May 25 04:25:54 localhost kernel: [  461.525651] Pid: 1341, comm: scsi_scan_12 Not tainted 3.3.0 #4 Hewlett-Packard HP Z400 Workstation/0B4Ch
May 25 04:25:54 localhost kernel: [  461.525669] RIP: 0010:[<ffffffff811f0c45>]  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
May 25 04:25:54 localhost kernel: [  461.525685] RSP: 0018:ffff88019419bce0  EFLAGS: 00010246
May 25 04:25:54 localhost kernel: [  461.525693] RAX: ffff880192ad2010 RBX: ffff880192ad5438 RCX: 0000000000000000
May 25 04:25:54 localhost kernel: [  461.525702] RDX: ffff880199034c60 RSI: ffff880192ad0018 RDI: ffff880192ad5438
May 25 04:25:54 localhost kernel: [  461.525711] RBP: ffff88019419bd10 R08: ffff880199034c60 R09: ffff880187c81980
May 25 04:25:54 localhost kernel: [  461.525720] R10: 0000000000000400 R11: 0000000000010140 R12: 0000000000000000
May 25 04:25:54 localhost kernel: [  461.525728] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880192ad5400
May 25 04:25:54 localhost kernel: [  461.525738] FS:  0000000000000000(0000) GS:ffff88019fc00000(0000) knlGS:0000000000000000
May 25 04:25:54 localhost kernel: [  461.525748] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
May 25 04:25:54 localhost kernel: [  461.525756] CR2: 0000000000000079 CR3: 0000000001c05000 CR4: 00000000000006f0
May 25 04:25:54 localhost kernel: [  461.525765] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
May 25 04:25:54 localhost kernel: [  461.525774] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
May 25 04:25:54 localhost kernel: [  461.525788] Process scsi_scan_12 (pid: 1341, threadinfo ffff88019419a000, task ffff880187ca9730)
May 25 04:25:54 localhost kernel: [  461.525796] mpt2sas0: sending diag reset !!
May 25 04:25:54 localhost kernel: [  461.525805] Stack:
May 25 04:25:54 localhost kernel: [  461.525810]  ffff88019419bd30 ffffffff81088c0e ffff88019419bd50 ffff880192ad5438
May 25 04:25:54 localhost kernel: [  461.525829]  ffff880192ad2010 0000000000000000 ffff88019419bd60 ffffffff812c2e6e
May 25 04:25:54 localhost kernel: [  461.526446]  0000000100027998 ffffffff81e487c0 ffff88019419bd40 ffff880192ad5438
May 25 04:25:54 localhost kernel: [  461.526992] Call Trace:
May 25 04:25:54 localhost kernel: [  461.527534]  [<ffffffff81088c0e>] ? try_to_wake_up+0x1be/0x2b0
May 25 04:25:54 localhost kernel: [  461.528075]  [<ffffffff812c2e6e>] kobject_add_internal+0xae/0x250
May 25 04:25:54 localhost kernel: [  461.528612]  [<ffffffff812c3417>] kobject_add+0x67/0xc0
May 25 04:25:54 localhost kernel: [  461.529150]  [<ffffffff8139eb32>] device_add+0x102/0x6c0
May 25 04:25:54 localhost kernel: [  461.529714]  [<ffffffff815f2a6b>] ? wait_for_common+0x3b/0x170
May 25 04:25:54 localhost kernel: [  461.530300]  [<ffffffff813c81e8>] scsi_sysfs_add_sdev+0x198/0x340
May 25 04:25:54 localhost kernel: [  461.530886]  [<ffffffff813c6aa4>] do_scan_async+0x84/0x160
May 25 04:25:54 localhost kernel: [  461.531473]  [<ffffffff813c6a20>] ? do_scsi_scan_host+0xa0/0xa0
May 25 04:25:54 localhost kernel: [  461.532063]  [<ffffffff81079c63>] kthread+0x93/0xa0
May 25 04:25:54 localhost kernel: [  461.532647]  [<ffffffff815fd124>] kernel_thread_helper+0x4/0x10
May 25 04:25:54 localhost kernel: [  461.533231]  [<ffffffff81079bd0>] ? kthread_freezable_should_stop+0x70/0x70
May 25 04:25:54 localhost kernel: [  461.533822]  [<ffffffff815fd120>] ? gs_change+0x13/0x13
May 25 04:25:54 localhost kernel: [  461.534409] Code: 83 ec 18 66 66 66 66 90 48 85 ff 48 89 fb 0f 84 98 00 00 00 48 8b 47 18 49 c7 c4 20 0e c5 81 48 85 c0 74 04 4c 8b 60 30 45 31 ed <41> 80 7c 24 79 00 75 5b 48 89 df e8 8b 2c 0d 00 48 85 c0 74 66 
May 25 04:25:54 localhost kernel: [  461.535150] RIP  [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
May 25 04:25:54 localhost kernel: [  461.535747]  RSP <ffff88019419bce0>
May 25 04:25:54 localhost kernel: [  461.536366] CR2: 0000000000000079
May 25 04:25:54 localhost kernel: [  461.537043] ---[ end trace 83bf7cc296e3a5d6 ]---
May 25 04:25:55 localhost kernel: [  462.585300] mpt2sas0: diag reset: SUCCESS



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-25 15:13               ` Tomas Henzl
@ 2012-05-25 18:46                 ` Dan Williams
  2012-05-28 11:58                   ` Tomas Henzl
  0 siblings, 1 reply; 38+ messages in thread
From: Dan Williams @ 2012-05-25 18:46 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: James Bottomley, Bart Van Assche, linux-scsi, Stanislaw Gruszka,
	Mike Christie, stefanr

On Fri, May 25, 2012 at 8:13 AM, Tomas Henzl <thenzl@redhat.com> wrote:
> May 25 04:25:54 localhost kernel: [  461.525209] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
> May 25 04:25:54 localhost kernel: [  461.525242] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0

Have a look at:

http://marc.info/?l=linux-scsi&m=133793175125892&w=2

...it addresses tearing down targets before they are added which
appears to be the signature here.

--
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] How to fix an async scan - 'rmmod --wait' race?
  2012-05-25 18:46                 ` Dan Williams
@ 2012-05-28 11:58                   ` Tomas Henzl
  0 siblings, 0 replies; 38+ messages in thread
From: Tomas Henzl @ 2012-05-28 11:58 UTC (permalink / raw)
  To: Dan Williams
  Cc: James Bottomley, Bart Van Assche, linux-scsi, Stanislaw Gruszka,
	Mike Christie, stefanr

On 05/25/2012 08:46 PM, Dan Williams wrote:
> On Fri, May 25, 2012 at 8:13 AM, Tomas Henzl <thenzl@redhat.com> wrote:
>> May 25 04:25:54 localhost kernel: [  461.525209] BUG: unable to handle kernel NULL pointer dereference at 0000000000000079
>> May 25 04:25:54 localhost kernel: [  461.525242] IP: [<ffffffff811f0c45>] sysfs_create_dir+0x35/0xc0
> Have a look at:
>
> http://marc.info/?l=linux-scsi&m=133793175125892&w=2
>
> ...it addresses tearing down targets before they are added which
> appears to be the signature here.

Yes it helps, I can not reproduce this^ BUG any longer.

The problem I'm trying to fix can be seen when the lld is removed while
the scsi async scan is running. The reason is, that the pointers to the lld stored
in scsi_host_template are accesed even after the lld is removed.
Several possibilities how to fix this have evolved during the time the two main options
are -1) protect the scan process with try_modele_get/put, there is a problem
with 'rmmod --wait'. This seems now, with your patch, to be removed (the try_module-get-put
fix is still needed). I don't believe this is 100% safe because of the problems
in scsi_device_get. 
2) the patch posted here http://marc.info/?l=linux-scsi&m=133527313522298&w=2
it cancels the async scan, when the driver calls scsi_remove_host.(it's faster :)
I hope it is safe -I haven't seen any problems, tested without your patch.
I think your use case differs, so this patch won't fix problems you see.

--------------

>From my point of view your patch is needed and one of the two patches referenced above is needed
too, I don't care which one will be used.


Tomas


>
> --
> Dan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2012-05-28 11:58 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-05 13:58 [RFC] How to fix an async scan - rmmod race? Tomas Henzl
2012-04-05 15:57 ` Mike Christie
2012-04-05 16:05   ` Mike Christie
2012-04-05 18:00 ` Bart Van Assche
2012-04-05 21:29   ` Mike Christie
2012-04-06  9:24     ` Bart Van Assche
2012-04-06 17:22       ` Mike Christie
2012-04-06 18:37         ` Bart Van Assche
2012-04-11 21:46         ` Mike Christie
2012-04-06  9:54     ` Tomas Henzl
2012-04-06 15:20       ` James Bottomley
2012-04-06 16:15         ` Bart Van Assche
2012-04-06 16:35           ` James Bottomley
2012-04-06 17:01             ` Bart Van Assche
2012-04-06 17:15               ` James Bottomley
2012-04-06 17:59                 ` Bart Van Assche
2012-04-08 17:38                 ` Bart Van Assche
2012-04-11 18:17                   ` Mike Christie
2012-04-11 18:30                     ` Mike Christie
2012-04-11 19:47                     ` Bart Van Assche
2012-04-11 22:28                       ` Mike Christie
2012-04-12 10:48                         ` Bart Van Assche
2012-04-06  9:39 ` Bart Van Assche
2012-04-06 10:14   ` Tomas Henzl
2012-04-06 13:13     ` Tomas Henzl
2012-04-06 14:38       ` Bart Van Assche
2012-04-06 15:32         ` Tomas Henzl
2012-04-12 12:48 ` [RFC] How to fix an async scan - rmmod race? try_module_get Tomas Henzl
2012-04-18 16:48   ` [RFC] How to fix an async scan - 'rmmod --wait' race? Tomas Henzl
2012-04-18 18:18     ` Bart Van Assche
2012-05-17  8:42     ` James Bottomley
2012-05-17  8:55       ` Bart Van Assche
2012-05-17  9:01         ` James Bottomley
2012-05-17 14:51           ` Tomas Henzl
2012-05-22 10:05             ` James Bottomley
2012-05-25 15:13               ` Tomas Henzl
2012-05-25 18:46                 ` Dan Williams
2012-05-28 11:58                   ` Tomas Henzl

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.