All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Separate target visibility from reaped state information
@ 2016-01-08 16:51 Bart Van Assche
  2016-01-18  8:55 ` Johannes Thumshirn
  2016-01-20  0:30 ` Martin K. Petersen
  0 siblings, 2 replies; 24+ messages in thread
From: Bart Van Assche @ 2016-01-08 16:51 UTC (permalink / raw)
  To: James Bottomley
  Cc: Martin K. Petersen, Christoph Hellwig, Johannes Thumshirn,
	Dan Williams, Sebastian Herbszt, linux-scsi

Instead of representing the states "visible in sysfs" and
"has been removed from the target list" by a single state
variable, use two variables to represent this information.

This patch avoids that SCSI device removal can trigger the
following soft lockup:

NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/1:1:29]
CPU: 1 PID: 29 Comm: kworker/1:1 Tainted: G           O    4.4.0-rc5-2.g1e923a3-default #1
Workqueue: fc_wq_4 fc_rport_final_delete [scsi_transport_fc]
Call Trace:
 [<c066b0f7>] scsi_remove_target+0x167/0x1c0
 [<f8f0a4ed>] fc_rport_final_delete+0x9d/0x1e0 [scsi_transport_fc]
 [<c026cb25>] process_one_work+0x155/0x3e0
 [<c026cde7>] worker_thread+0x37/0x490
 [<c027214b>] kthread+0x9b/0xb0
 [<c07e72c1>] ret_from_kernel_thread+0x21/0x40

See also commit bc3f02a795d3 ("scsi_remove_target: fix softlockup
regression on hot remove").

Reported-by: Sebastian Herbszt <herbszt@gmx.de>
Tested-by: Sebastian Herbszt <herbszt@gmx.de>
Fixes: commit 40998193560d ("scsi: restart list search after unlock in scsi_remove_target")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: stable <stable@vger.kernel.org>
---
 drivers/scsi/scsi_scan.c   | 31 +++----------------------------
 drivers/scsi/scsi_sysfs.c  |  7 ++++---
 include/scsi/scsi_device.h |  9 ++-------
 3 files changed, 9 insertions(+), 38 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 054923e..c455a88 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -314,7 +314,6 @@ static void scsi_target_destroy(struct scsi_target *starget)
 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
 	unsigned long flags;
 
-	starget->state = STARGET_DEL;
 	transport_destroy_device(dev);
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->hostt->target_destroy)
@@ -379,19 +378,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
 	struct scsi_target *starget
 		= container_of(kref, struct scsi_target, reap_ref);
 
-	/*
-	 * if we get here and the target is still in the CREATED state that
-	 * means it was allocated but never made visible (because a scan
-	 * turned up no LUNs), so don't call device_del() on it.
-	 */
-	if (starget->state != STARGET_CREATED) {
+	if (starget->is_visible) {
+		starget->is_visible = false;
 		transport_remove_device(&starget->dev);
 		device_del(&starget->dev);
 	}
 	scsi_target_destroy(starget);
 }
 
-static void scsi_target_reap_ref_put(struct scsi_target *starget)
+void scsi_target_reap(struct scsi_target *starget)
 {
 	kref_put(&starget->reap_ref, scsi_target_reap_ref_release);
 }
@@ -437,7 +432,6 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	starget->can_queue = 0;
 	INIT_LIST_HEAD(&starget->siblings);
 	INIT_LIST_HEAD(&starget->devices);
-	starget->state = STARGET_CREATED;
 	starget->scsi_level = SCSI_2;
 	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
  retry:
@@ -498,25 +492,6 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 }
 
 /**
- * scsi_target_reap - check to see if target is in use and destroy if not
- * @starget: target to be checked
- *
- * This is used after removing a LUN or doing a last put of the target
- * it checks atomically that nothing is using the target and removes
- * it if so.
- */
-void scsi_target_reap(struct scsi_target *starget)
-{
-	/*
-	 * serious problem if this triggers: STARGET_DEL is only set in the if
-	 * the reap_ref drops to zero, so we're trying to do another final put
-	 * on an already released kref
-	 */
-	BUG_ON(starget->state == STARGET_DEL);
-	scsi_target_reap_ref_put(starget);
-}
-
-/**
  * sanitize_inquiry_string - remove non-graphical chars from an INQUIRY result string
  * @s: INQUIRY result string to sanitize
  * @len: length of the string
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 21930c9..532c062 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1000,7 +1000,7 @@ static int scsi_target_add(struct scsi_target *starget)
 {
 	int error;
 
-	if (starget->state != STARGET_CREATED)
+	if (starget->is_visible)
 		return 0;
 
 	error = device_add(&starget->dev);
@@ -1009,7 +1009,7 @@ static int scsi_target_add(struct scsi_target *starget)
 		return error;
 	}
 	transport_add_device(&starget->dev);
-	starget->state = STARGET_RUNNING;
+	starget->is_visible = true;
 
 	pm_runtime_set_active(&starget->dev);
 	pm_runtime_enable(&starget->dev);
@@ -1198,10 +1198,11 @@ void scsi_remove_target(struct device *dev)
 restart:
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_for_each_entry(starget, &shost->__targets, siblings) {
-		if (starget->state == STARGET_DEL)
+		if (starget->reaped)
 			continue;
 		if (starget->dev.parent == dev || &starget->dev == dev) {
 			kref_get(&starget->reap_ref);
+			starget->reaped = true;
 			spin_unlock_irqrestore(shost->host_lock, flags);
 			__scsi_remove_target(starget);
 			scsi_target_reap(starget);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index fe89d7c..f11c794 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -236,12 +236,6 @@ scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...);
 			sdev_dbg((scmd)->device, fmt, ##a);		   \
 	} while (0)
 
-enum scsi_target_state {
-	STARGET_CREATED = 1,
-	STARGET_RUNNING,
-	STARGET_DEL,
-};
-
 /*
  * scsi_target: representation of a scsi target, for now, this is only
  * used for single_lun devices. If no one has active IO to the target,
@@ -267,6 +261,8 @@ struct scsi_target {
 	unsigned int		expecting_lun_change:1;	/* A device has reported
 						 * a 3F/0E UA, other devices on
 						 * the same target will also. */
+	unsigned int		is_visible:1; /* visible in sysfs */
+	unsigned int		reaped:1; /* removed from target list */
 	/* commands actually active on LLD. */
 	atomic_t		target_busy;
 	atomic_t		target_blocked;
@@ -280,7 +276,6 @@ struct scsi_target {
 #define SCSI_DEFAULT_TARGET_BLOCKED	3
 
 	char			scsi_level;
-	enum scsi_target_state	state;
 	void 			*hostdata; /* available to low-level driver */
 	unsigned long		starget_data[0]; /* for the transport */
 	/* starget_data must be the last element!!!! */
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-01-08 16:51 [PATCH] Separate target visibility from reaped state information Bart Van Assche
@ 2016-01-18  8:55 ` Johannes Thumshirn
  2016-01-20  0:30 ` Martin K. Petersen
  1 sibling, 0 replies; 24+ messages in thread
From: Johannes Thumshirn @ 2016-01-18  8:55 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: James Bottomley, Martin K. Petersen, Christoph Hellwig,
	Dan Williams, Sebastian Herbszt, linux-scsi

On Fri, Jan 08, 2016 at 05:51:46PM +0100, Bart Van Assche wrote:
> Instead of representing the states "visible in sysfs" and
> "has been removed from the target list" by a single state
> variable, use two variables to represent this information.
> 
> This patch avoids that SCSI device removal can trigger the
> following soft lockup:
> 
> NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/1:1:29]
> CPU: 1 PID: 29 Comm: kworker/1:1 Tainted: G           O    4.4.0-rc5-2.g1e923a3-default #1
> Workqueue: fc_wq_4 fc_rport_final_delete [scsi_transport_fc]
> Call Trace:
>  [<c066b0f7>] scsi_remove_target+0x167/0x1c0
>  [<f8f0a4ed>] fc_rport_final_delete+0x9d/0x1e0 [scsi_transport_fc]
>  [<c026cb25>] process_one_work+0x155/0x3e0
>  [<c026cde7>] worker_thread+0x37/0x490
>  [<c027214b>] kthread+0x9b/0xb0
>  [<c07e72c1>] ret_from_kernel_thread+0x21/0x40
> 
> See also commit bc3f02a795d3 ("scsi_remove_target: fix softlockup
> regression on hot remove").
> 
> Reported-by: Sebastian Herbszt <herbszt@gmx.de>
> Tested-by: Sebastian Herbszt <herbszt@gmx.de>
> Fixes: commit 40998193560d ("scsi: restart list search after unlock in scsi_remove_target")
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Johannes Thumshirn <jthumshirn@suse.de>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: stable <stable@vger.kernel.org>
> ---
>  drivers/scsi/scsi_scan.c   | 31 +++----------------------------
>  drivers/scsi/scsi_sysfs.c  |  7 ++++---
>  include/scsi/scsi_device.h |  9 ++-------
>  3 files changed, 9 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 054923e..c455a88 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -314,7 +314,6 @@ static void scsi_target_destroy(struct scsi_target *starget)
>  	struct Scsi_Host *shost = dev_to_shost(dev->parent);
>  	unsigned long flags;
>  
> -	starget->state = STARGET_DEL;
>  	transport_destroy_device(dev);
>  	spin_lock_irqsave(shost->host_lock, flags);
>  	if (shost->hostt->target_destroy)
> @@ -379,19 +378,15 @@ static void scsi_target_reap_ref_release(struct kref *kref)
>  	struct scsi_target *starget
>  		= container_of(kref, struct scsi_target, reap_ref);
>  
> -	/*
> -	 * if we get here and the target is still in the CREATED state that
> -	 * means it was allocated but never made visible (because a scan
> -	 * turned up no LUNs), so don't call device_del() on it.
> -	 */
> -	if (starget->state != STARGET_CREATED) {
> +	if (starget->is_visible) {
> +		starget->is_visible = false;
>  		transport_remove_device(&starget->dev);
>  		device_del(&starget->dev);
>  	}
>  	scsi_target_destroy(starget);
>  }
>  
> -static void scsi_target_reap_ref_put(struct scsi_target *starget)
> +void scsi_target_reap(struct scsi_target *starget)
>  {
>  	kref_put(&starget->reap_ref, scsi_target_reap_ref_release);
>  }
> @@ -437,7 +432,6 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>  	starget->can_queue = 0;
>  	INIT_LIST_HEAD(&starget->siblings);
>  	INIT_LIST_HEAD(&starget->devices);
> -	starget->state = STARGET_CREATED;
>  	starget->scsi_level = SCSI_2;
>  	starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
>   retry:
> @@ -498,25 +492,6 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
>  }
>  
>  /**
> - * scsi_target_reap - check to see if target is in use and destroy if not
> - * @starget: target to be checked
> - *
> - * This is used after removing a LUN or doing a last put of the target
> - * it checks atomically that nothing is using the target and removes
> - * it if so.
> - */
> -void scsi_target_reap(struct scsi_target *starget)
> -{
> -	/*
> -	 * serious problem if this triggers: STARGET_DEL is only set in the if
> -	 * the reap_ref drops to zero, so we're trying to do another final put
> -	 * on an already released kref
> -	 */
> -	BUG_ON(starget->state == STARGET_DEL);
> -	scsi_target_reap_ref_put(starget);
> -}
> -
> -/**
>   * sanitize_inquiry_string - remove non-graphical chars from an INQUIRY result string
>   * @s: INQUIRY result string to sanitize
>   * @len: length of the string
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 21930c9..532c062 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -1000,7 +1000,7 @@ static int scsi_target_add(struct scsi_target *starget)
>  {
>  	int error;
>  
> -	if (starget->state != STARGET_CREATED)
> +	if (starget->is_visible)
>  		return 0;
>  
>  	error = device_add(&starget->dev);
> @@ -1009,7 +1009,7 @@ static int scsi_target_add(struct scsi_target *starget)
>  		return error;
>  	}
>  	transport_add_device(&starget->dev);
> -	starget->state = STARGET_RUNNING;
> +	starget->is_visible = true;
>  
>  	pm_runtime_set_active(&starget->dev);
>  	pm_runtime_enable(&starget->dev);
> @@ -1198,10 +1198,11 @@ void scsi_remove_target(struct device *dev)
>  restart:
>  	spin_lock_irqsave(shost->host_lock, flags);
>  	list_for_each_entry(starget, &shost->__targets, siblings) {
> -		if (starget->state == STARGET_DEL)
> +		if (starget->reaped)
>  			continue;
>  		if (starget->dev.parent == dev || &starget->dev == dev) {
>  			kref_get(&starget->reap_ref);
> +			starget->reaped = true;
>  			spin_unlock_irqrestore(shost->host_lock, flags);
>  			__scsi_remove_target(starget);
>  			scsi_target_reap(starget);
> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> index fe89d7c..f11c794 100644
> --- a/include/scsi/scsi_device.h
> +++ b/include/scsi/scsi_device.h
> @@ -236,12 +236,6 @@ scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...);
>  			sdev_dbg((scmd)->device, fmt, ##a);		   \
>  	} while (0)
>  
> -enum scsi_target_state {
> -	STARGET_CREATED = 1,
> -	STARGET_RUNNING,
> -	STARGET_DEL,
> -};
> -
>  /*
>   * scsi_target: representation of a scsi target, for now, this is only
>   * used for single_lun devices. If no one has active IO to the target,
> @@ -267,6 +261,8 @@ struct scsi_target {
>  	unsigned int		expecting_lun_change:1;	/* A device has reported
>  						 * a 3F/0E UA, other devices on
>  						 * the same target will also. */
> +	unsigned int		is_visible:1; /* visible in sysfs */
> +	unsigned int		reaped:1; /* removed from target list */
>  	/* commands actually active on LLD. */
>  	atomic_t		target_busy;
>  	atomic_t		target_blocked;
> @@ -280,7 +276,6 @@ struct scsi_target {
>  #define SCSI_DEFAULT_TARGET_BLOCKED	3
>  
>  	char			scsi_level;
> -	enum scsi_target_state	state;
>  	void 			*hostdata; /* available to low-level driver */
>  	unsigned long		starget_data[0]; /* for the transport */
>  	/* starget_data must be the last element!!!! */
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Looks fine to me.
Thanks

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-01-08 16:51 [PATCH] Separate target visibility from reaped state information Bart Van Assche
  2016-01-18  8:55 ` Johannes Thumshirn
@ 2016-01-20  0:30 ` Martin K. Petersen
  2016-01-20  1:03   ` James Bottomley
  1 sibling, 1 reply; 24+ messages in thread
From: Martin K. Petersen @ 2016-01-20  0:30 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: James Bottomley, Martin K. Petersen, Christoph Hellwig,
	Johannes Thumshirn, Dan Williams, Sebastian Herbszt, linux-scsi

>>>>> "Bart" == Bart Van Assche <bart.vanassche@sandisk.com> writes:

Bart> Instead of representing the states "visible in sysfs" and "has
Bart> been removed from the target list" by a single state variable, use
Bart> two variables to represent this information.

James: Are you happy with the latest iteration of this? Should I queue
it?

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-01-20  0:30 ` Martin K. Petersen
@ 2016-01-20  1:03   ` James Bottomley
  2016-01-31 17:54     ` Sebastian Herbszt
  2016-02-02  3:43     ` Bart Van Assche
  0 siblings, 2 replies; 24+ messages in thread
From: James Bottomley @ 2016-01-20  1:03 UTC (permalink / raw)
  To: Martin K. Petersen, Bart Van Assche
  Cc: Christoph Hellwig, Johannes Thumshirn, Dan Williams,
	Sebastian Herbszt, linux-scsi

On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > "Bart" == Bart Van Assche <bart.vanassche@sandisk.com>
> > > > > > writes:
> 
> Bart> Instead of representing the states "visible in sysfs" and "has
> Bart> been removed from the target list" by a single state variable,
> use
> Bart> two variables to represent this information.
> 
> James: Are you happy with the latest iteration of this? Should I
> queue
> it?

Well, I'm OK with the patch: it's a simple transformation of the
enumerated state to a two bit state.  What I can't see is how it fixes
any soft lockup.

The only change from the current workflow is that the DEL transition
(now the reaped flag) is done before the spin lock is dropped which
would fix a tiny window for two threads both trying to remove the same
target, but there's nothing that could possibly fix an iterative soft
lockup caused by restarting the loop, which is what the changelog says.

James


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-01-20  1:03   ` James Bottomley
@ 2016-01-31 17:54     ` Sebastian Herbszt
  2016-02-02  1:11       ` Martin K. Petersen
  2016-02-02  3:43     ` Bart Van Assche
  1 sibling, 1 reply; 24+ messages in thread
From: Sebastian Herbszt @ 2016-01-31 17:54 UTC (permalink / raw)
  To: James Bottomley, Martin K. Petersen
  Cc: Bart Van Assche, Christoph Hellwig, Johannes Thumshirn,
	Dan Williams, linux-scsi, Sebastian Herbszt

James Bottomley wrote:
> On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > "Bart" == Bart Van Assche <bart.vanassche@sandisk.com>
> > > > > > > writes:
> > 
> > Bart> Instead of representing the states "visible in sysfs" and "has
> > Bart> been removed from the target list" by a single state variable,
> > use
> > Bart> two variables to represent this information.
> > 
> > James: Are you happy with the latest iteration of this? Should I
> > queue
> > it?
> 
> Well, I'm OK with the patch: it's a simple transformation of the
> enumerated state to a two bit state.  What I can't see is how it fixes
> any soft lockup.
> 
> The only change from the current workflow is that the DEL transition
> (now the reaped flag) is done before the spin lock is dropped which
> would fix a tiny window for two threads both trying to remove the same
> target, but there's nothing that could possibly fix an iterative soft
> lockup caused by restarting the loop, which is what the changelog says.
> 
> James

James, Martin, what's the status of this patch?
I still hit the reported soft lockup on 4.5-rc1.

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-01-31 17:54     ` Sebastian Herbszt
@ 2016-02-02  1:11       ` Martin K. Petersen
  2016-02-02  9:03         ` Johannes Thumshirn
                           ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Martin K. Petersen @ 2016-02-02  1:11 UTC (permalink / raw)
  To: Sebastian Herbszt
  Cc: James Bottomley, Martin K. Petersen, Bart Van Assche,
	Christoph Hellwig, Johannes Thumshirn, Dan Williams, linux-scsi

>>>>> "Sebastian" == Sebastian Herbszt <herbszt@gmx.de> writes:

>> The only change from the current workflow is that the DEL transition
>> (now the reaped flag) is done before the spin lock is dropped which
>> would fix a tiny window for two threads both trying to remove the
>> same target, but there's nothing that could possibly fix an iterative
>> soft lockup caused by restarting the loop, which is what the
>> changelog says.

Sebastian> James, Martin, what's the status of this patch?  I still hit
Sebastian> the reported soft lockup on 4.5-rc1.

And you have verified that Bart's patch applied on top of 4.5-rc1 still
fixes the lockup? (I know you tested a previous version)

I am concerned about queuing something as a stable fix if it is just
masking a fundamental underlying problem. James' comment suggests that
there is something else going on.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-01-20  1:03   ` James Bottomley
  2016-01-31 17:54     ` Sebastian Herbszt
@ 2016-02-02  3:43     ` Bart Van Assche
  2016-02-02 11:46       ` James Bottomley
  1 sibling, 1 reply; 24+ messages in thread
From: Bart Van Assche @ 2016-02-02  3:43 UTC (permalink / raw)
  To: James Bottomley, Martin K. Petersen
  Cc: Christoph Hellwig, Johannes Thumshirn, Dan Williams,
	Sebastian Herbszt, linux-scsi

On 01/19/16 17:03, James Bottomley wrote:
> On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
>>>>>>> "Bart" == Bart Van Assche <bart.vanassche@sandisk.com>
>>>>>>> writes:
>>
>> Bart> Instead of representing the states "visible in sysfs" and "has
>> Bart> been removed from the target list" by a single state variable,
>> use
>> Bart> two variables to represent this information.
>>
>> James: Are you happy with the latest iteration of this? Should I
>> queue
>> it?
>
> Well, I'm OK with the patch: it's a simple transformation of the
> enumerated state to a two bit state.  What I can't see is how it fixes
> any soft lockup.
>
> The only change from the current workflow is that the DEL transition
> (now the reaped flag) is done before the spin lock is dropped which
> would fix a tiny window for two threads both trying to remove the same
> target, but there's nothing that could possibly fix an iterative soft
> lockup caused by restarting the loop, which is what the changelog says.

Hello James,

scsi_remove_target() doesn't lock the scan_mutex which means that 
concurrent SCSI scanning activity is not prohibited. Such scanning 
activity can postpone the transition of the state of a SCSI target into 
STARGET_DEL. I think if the scheduler decides to run the thread that 
executes scsi_remove_target() on the same CPU as the scanning code after 
the scanning code has obtained a reap ref and before the scanning code 
has released the reap ref again that the soft lockup can be triggered 
that has been reported by Sebastian Herbszt.

Bart.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02  1:11       ` Martin K. Petersen
@ 2016-02-02  9:03         ` Johannes Thumshirn
  2016-02-03 17:17         ` Christoph Hellwig
                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Johannes Thumshirn @ 2016-02-02  9:03 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Sebastian Herbszt, James Bottomley, Bart Van Assche,
	Christoph Hellwig, Dan Williams, linux-scsi, Dick Kennedy

On Mon, Feb 01, 2016 at 08:11:29PM -0500, Martin K. Petersen wrote:
> >>>>> "Sebastian" == Sebastian Herbszt <herbszt@gmx.de> writes:
> 
> >> The only change from the current workflow is that the DEL transition
> >> (now the reaped flag) is done before the spin lock is dropped which
> >> would fix a tiny window for two threads both trying to remove the
> >> same target, but there's nothing that could possibly fix an iterative
> >> soft lockup caused by restarting the loop, which is what the
> >> changelog says.
> 
> Sebastian> James, Martin, what's the status of this patch?  I still hit
> Sebastian> the reported soft lockup on 4.5-rc1.
> 
> And you have verified that Bart's patch applied on top of 4.5-rc1 still
> fixes the lockup? (I know you tested a previous version)
> 

I had an off list discussion/problem report from Dick Kennedy, pointed him
to this very patch and he verified it solved his problem (a lockup like
reported by Sebastian).

I'm not sure if he tested v4.4.X or v4.5-rcX though.

> I am concerned about queuing something as a stable fix if it is just
> masking a fundamental underlying problem. James' comment suggests that
> there is something else going on.
> 
> -- 
> Martin K. Petersen	Oracle Linux Engineering


-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02  3:43     ` Bart Van Assche
@ 2016-02-02 11:46       ` James Bottomley
  2016-02-02 18:29         ` Bart Van Assche
  2016-02-03 22:38         ` Sebastian Herbszt
  0 siblings, 2 replies; 24+ messages in thread
From: James Bottomley @ 2016-02-02 11:46 UTC (permalink / raw)
  To: Bart Van Assche, Martin K. Petersen
  Cc: Christoph Hellwig, Johannes Thumshirn, Dan Williams,
	Sebastian Herbszt, linux-scsi

On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> On 01/19/16 17:03, James Bottomley wrote:
> > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > "Bart" == Bart Van Assche <bart.vanassche@sandisk.com>
> > > > > > > > writes:
> > > 
> > > Bart> Instead of representing the states "visible in sysfs" and
> > > "has
> > > Bart> been removed from the target list" by a single state
> > > variable,
> > > use
> > > Bart> two variables to represent this information.
> > > 
> > > James: Are you happy with the latest iteration of this? Should I
> > > queue
> > > it?
> > 
> > Well, I'm OK with the patch: it's a simple transformation of the
> > enumerated state to a two bit state.  What I can't see is how it
> > fixes
> > any soft lockup.
> > 
> > The only change from the current workflow is that the DEL
> > transition
> > (now the reaped flag) is done before the spin lock is dropped which
> > would fix a tiny window for two threads both trying to remove the
> > same
> > target, but there's nothing that could possibly fix an iterative
> > soft
> > lockup caused by restarting the loop, which is what the changelog
> > says.
> 
> Hello James,
> 
> scsi_remove_target() doesn't lock the scan_mutex which means that 
> concurrent SCSI scanning activity is not prohibited. Such scanning 
> activity can postpone the transition of the state of a SCSI target 
> into STARGET_DEL. I think if the scheduler decides to run the thread 
> that executes scsi_remove_target() on the same CPU as the scanning 
> code after the scanning code has obtained a reap ref and before the 
> scanning code has released the reap ref again that the soft lockup 
> can be triggered that has been reported by Sebastian Herbszt.

OK, I finally understand the scenario;  I'm not sure I understand how
we're getting concurrent scanning and removal from a simple rmmod ... I
take it this is insmod rmmod in a tight loop?

So this patch now actually introduces a problem the other way: we can
do a scan with a dying target, which will lead to problems down the
road.  The original design of the code was to allow the target to be
resurrected even while being removed, because the target doesn't exist
independently of the devices ... when the last device is removed the
target is reaped.  So a test case this would need to pass is adding and
removing a single device on a target in a tight loop.  The problem
you'll see is that eventually the add will fail nastily with your code
because the target can't be resurrected even though we have a reference
and we find a device to attach because once we set your reaped flag,
the destruction is irrevocable.

All we really need to break the soft lockup is to not keep looping over
a device that we've called remove on but which hasn't gone into DEL
state.  So how about this.  It will retain a simplistic memory of the
last target and not keep looping over it.  I think it will fix the soft
lockup and preserve the resurrection of the target for the device
add/remove case.

James

---

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 4f18a85..00bc721 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1272,16 +1272,18 @@ static void __scsi_remove_target(struct scsi_target *starget)
 void scsi_remove_target(struct device *dev)
 {
 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
-	struct scsi_target *starget;
+	struct scsi_target *starget, *last_target = NULL;
 	unsigned long flags;
 
 restart:
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_for_each_entry(starget, &shost->__targets, siblings) {
-		if (starget->state == STARGET_DEL)
+		if (starget->state == STARGET_DEL ||
+		    starget == last_target)
 			continue;
 		if (starget->dev.parent == dev || &starget->dev == dev) {
 			kref_get(&starget->reap_ref);
+			last_target = starget;
 			spin_unlock_irqrestore(shost->host_lock, flags);
 			__scsi_remove_target(starget);
 			scsi_target_reap(starget);


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02 11:46       ` James Bottomley
@ 2016-02-02 18:29         ` Bart Van Assche
  2016-02-03  0:43           ` James Bottomley
  2016-02-03 22:38         ` Sebastian Herbszt
  1 sibling, 1 reply; 24+ messages in thread
From: Bart Van Assche @ 2016-02-02 18:29 UTC (permalink / raw)
  To: James Bottomley, Bart Van Assche
  Cc: Martin K. Petersen, Christoph Hellwig, Johannes Thumshirn,
	Dan Williams, Sebastian Herbszt, linux-scsi

On 02/02/2016 03:46 AM, James Bottomley wrote:
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 4f18a85..00bc721 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -1272,16 +1272,18 @@ static void __scsi_remove_target(struct scsi_target *starget)
>   void scsi_remove_target(struct device *dev)
>   {
>       struct Scsi_Host *shost = dev_to_shost(dev->parent);
> -     struct scsi_target *starget;
> +     struct scsi_target *starget, *last_target = NULL;
>       unsigned long flags;
>
>   restart:
>       spin_lock_irqsave(shost->host_lock, flags);
>       list_for_each_entry(starget, &shost->__targets, siblings) {
> -             if (starget->state == STARGET_DEL)
> +             if (starget->state == STARGET_DEL ||
> +                 starget == last_target)
>                       continue;
>               if (starget->dev.parent == dev || &starget->dev == dev) {
>                       kref_get(&starget->reap_ref);
> +                     last_target = starget;
>                       spin_unlock_irqrestore(shost->host_lock, flags);
>                       __scsi_remove_target(starget);
>                       scsi_target_reap(starget);

Hello James,

Do you think it is a robust approach to store the pointer to the last
removed target in the last_target variable ? What if e.g.
scsi_target_reap() frees the memory the last_target pointer points at
and another thread reallocates a scsi_target data structure ? Can that
last data structure have the same address as the contents of the
last_target variable ?

Thanks,

Bart.
PLEASE NOTE: The information contained in this electronic mail message is intended only for the use of the designated recipient(s) named above. If the reader of this message is not the intended recipient, you are hereby notified that you have received this message in error and that any review, dissemination, distribution, or copying of this message is strictly prohibited. If you have received this communication in error, please notify the sender by telephone or e-mail (as shown above) immediately and destroy any and all copies of this message in your possession (whether hard copies or electronically stored copies).

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02 18:29         ` Bart Van Assche
@ 2016-02-03  0:43           ` James Bottomley
  2016-02-03  1:17             ` Bart Van Assche
  0 siblings, 1 reply; 24+ messages in thread
From: James Bottomley @ 2016-02-03  0:43 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Martin K. Petersen, Christoph Hellwig, Johannes Thumshirn,
	Dan Williams, Sebastian Herbszt, linux-scsi

On Tue, 2016-02-02 at 10:29 -0800, Bart Van Assche wrote:
> On 02/02/2016 03:46 AM, James Bottomley wrote:
> > diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> > index 4f18a85..00bc721 100644
> > --- a/drivers/scsi/scsi_sysfs.c
> > +++ b/drivers/scsi/scsi_sysfs.c
> > @@ -1272,16 +1272,18 @@ static void __scsi_remove_target(struct
> > scsi_target *starget)
> >   void scsi_remove_target(struct device *dev)
> >   {
> >       struct Scsi_Host *shost = dev_to_shost(dev->parent);
> > -     struct scsi_target *starget;
> > +     struct scsi_target *starget, *last_target = NULL;
> >       unsigned long flags;
> > 
> >   restart:
> >       spin_lock_irqsave(shost->host_lock, flags);
> >       list_for_each_entry(starget, &shost->__targets, siblings) {
> > -             if (starget->state == STARGET_DEL)
> > +             if (starget->state == STARGET_DEL ||
> > +                 starget == last_target)
> >                       continue;
> >               if (starget->dev.parent == dev || &starget->dev ==
> > dev) {
> >                       kref_get(&starget->reap_ref);
> > +                     last_target = starget;
> >                       spin_unlock_irqrestore(shost->host_lock,
> > flags);
> >                       __scsi_remove_target(starget);
> >                       scsi_target_reap(starget);
> 
> Hello James,
> 
> Do you think it is a robust approach to store the pointer to the last
> removed target in the last_target variable ?

Well, yes, I think it will work, if that's what you mean.

>  What if e.g. scsi_target_reap() frees the memory the last_target 
> pointer points at and another thread reallocates a scsi_target data 
> structure ? Can that last data structure have the same address as the 
> contents of the last_target variable ?

Yes, but it doesn't matter, does it?  Add/Remove has always (and will
always) be racy.  Under current conditions you can still add to the
target after the list_for_each terminates and have scsi_remove_target()
return with attached devices.  The only way to close the race is
basically to forbid scanning as we shut down the host and wait for all
in-progress scans before starting the final removals.

James


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03  0:43           ` James Bottomley
@ 2016-02-03  1:17             ` Bart Van Assche
  2016-02-03  2:01               ` James Bottomley
  0 siblings, 1 reply; 24+ messages in thread
From: Bart Van Assche @ 2016-02-03  1:17 UTC (permalink / raw)
  To: James Bottomley
  Cc: Martin K. Petersen, Christoph Hellwig, Johannes Thumshirn,
	Dan Williams, Sebastian Herbszt, linux-scsi

On 02/02/2016 04:43 PM, James Bottomley wrote:
> On Tue, 2016-02-02 at 10:29 -0800, Bart Van Assche wrote:
>> On 02/02/2016 03:46 AM, James Bottomley wrote:
>>> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
>>> index 4f18a85..00bc721 100644
>>> --- a/drivers/scsi/scsi_sysfs.c
>>> +++ b/drivers/scsi/scsi_sysfs.c
>>> @@ -1272,16 +1272,18 @@ static void __scsi_remove_target(struct
>>> scsi_target *starget)
>>>    void scsi_remove_target(struct device *dev)
>>>    {
>>>        struct Scsi_Host *shost = dev_to_shost(dev->parent);
>>> -     struct scsi_target *starget;
>>> +     struct scsi_target *starget, *last_target = NULL;
>>>        unsigned long flags;
>>>
>>>    restart:
>>>        spin_lock_irqsave(shost->host_lock, flags);
>>>        list_for_each_entry(starget, &shost->__targets, siblings) {
>>> -             if (starget->state == STARGET_DEL)
>>> +             if (starget->state == STARGET_DEL ||
>>> +                 starget == last_target)
>>>                        continue;
>>>                if (starget->dev.parent == dev || &starget->dev ==
>>> dev) {
>>>                        kref_get(&starget->reap_ref);
>>> +                     last_target = starget;
>>>                        spin_unlock_irqrestore(shost->host_lock,
>>> flags);
>>>                        __scsi_remove_target(starget);
>>>                        scsi_target_reap(starget);
>>
>> Hello James,
>>
>> Do you think it is a robust approach to store the pointer to the last
>> removed target in the last_target variable ?
>
> Well, yes, I think it will work, if that's what you mean.
>
>>   What if e.g. scsi_target_reap() frees the memory the last_target
>> pointer points at and another thread reallocates a scsi_target data
>> structure ? Can that last data structure have the same address as the
>> contents of the last_target variable ?
>
> Yes, but it doesn't matter, does it?  Add/Remove has always (and will
> always) be racy.  Under current conditions you can still add to the
> target after the list_for_each terminates and have scsi_remove_target()
> return with attached devices.  The only way to close the race is
> basically to forbid scanning as we shut down the host and wait for all
> in-progress scans before starting the final removals.

Hello James,

Although the scenario I described is unlikely if it happens it might be 
really hard to figure out what went wrong for someone who has not 
followed this discussion. This makes me wonder whether the above patch 
is really the best way to fix the reported soft lockup ...

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03  1:17             ` Bart Van Assche
@ 2016-02-03  2:01               ` James Bottomley
  0 siblings, 0 replies; 24+ messages in thread
From: James Bottomley @ 2016-02-03  2:01 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Martin K. Petersen, Christoph Hellwig, Johannes Thumshirn,
	Dan Williams, Sebastian Herbszt, linux-scsi

On Tue, 2016-02-02 at 17:17 -0800, Bart Van Assche wrote:
> On 02/02/2016 04:43 PM, James Bottomley wrote:
> > On Tue, 2016-02-02 at 10:29 -0800, Bart Van Assche wrote:
> > > On 02/02/2016 03:46 AM, James Bottomley wrote:
> > > > diff --git a/drivers/scsi/scsi_sysfs.c
> > > > b/drivers/scsi/scsi_sysfs.c
> > > > index 4f18a85..00bc721 100644
> > > > --- a/drivers/scsi/scsi_sysfs.c
> > > > +++ b/drivers/scsi/scsi_sysfs.c
> > > > @@ -1272,16 +1272,18 @@ static void __scsi_remove_target(struct
> > > > scsi_target *starget)
> > > >    void scsi_remove_target(struct device *dev)
> > > >    {
> > > >        struct Scsi_Host *shost = dev_to_shost(dev->parent);
> > > > -     struct scsi_target *starget;
> > > > +     struct scsi_target *starget, *last_target = NULL;
> > > >        unsigned long flags;
> > > > 
> > > >    restart:
> > > >        spin_lock_irqsave(shost->host_lock, flags);
> > > >        list_for_each_entry(starget, &shost->__targets,
> > > > siblings) {
> > > > -             if (starget->state == STARGET_DEL)
> > > > +             if (starget->state == STARGET_DEL ||
> > > > +                 starget == last_target)
> > > >                        continue;
> > > >                if (starget->dev.parent == dev || &starget->dev
> > > > ==
> > > > dev) {
> > > >                        kref_get(&starget->reap_ref);
> > > > +                     last_target = starget;
> > > >                        spin_unlock_irqrestore(shost->host_lock,
> > > > flags);
> > > >                        __scsi_remove_target(starget);
> > > >                        scsi_target_reap(starget);
> > > 
> > > Hello James,
> > > 
> > > Do you think it is a robust approach to store the pointer to the
> > > last
> > > removed target in the last_target variable ?
> > 
> > Well, yes, I think it will work, if that's what you mean.
> > 
> > >   What if e.g. scsi_target_reap() frees the memory the
> > > last_target
> > > pointer points at and another thread reallocates a scsi_target
> > > data
> > > structure ? Can that last data structure have the same address as
> > > the
> > > contents of the last_target variable ?
> > 
> > Yes, but it doesn't matter, does it?  Add/Remove has always (and
> > will
> > always) be racy.  Under current conditions you can still add to the
> > target after the list_for_each terminates and have
> > scsi_remove_target()
> > return with attached devices.  The only way to close the race is
> > basically to forbid scanning as we shut down the host and wait for
> > all
> > in-progress scans before starting the final removals.
> 
> Hello James,
> 
> Although the scenario I described is unlikely if it happens it might 
> be really hard to figure out what went wrong for someone who has not
> followed this discussion. This makes me wonder whether the above 
> patch is really the best way to fix the reported soft lockup ...

The race you're worrying about exists without the fix, so the fix
doesn't alter the current situation as I explained.  If you see another
issue, please say so (and explain it).

Thanks,

James



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02  1:11       ` Martin K. Petersen
  2016-02-02  9:03         ` Johannes Thumshirn
@ 2016-02-03 17:17         ` Christoph Hellwig
  2016-02-03 17:54           ` James Bottomley
  2016-02-03 21:37         ` *** GMX Spamverdacht *** " Sebastian Herbszt
  2016-02-07 22:48         ` Sebastian Herbszt
  3 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2016-02-03 17:17 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Sebastian Herbszt, James Bottomley, Bart Van Assche,
	Christoph Hellwig, Johannes Thumshirn, Dan Williams, linux-scsi

On Mon, Feb 01, 2016 at 08:11:29PM -0500, Martin K. Petersen wrote:
> I am concerned about queuing something as a stable fix if it is just
> masking a fundamental underlying problem.

It's not masking a fundamental problem.  It fixes the target
state so that we can mark a starget as being under deletion
before we have to drop the list protecting the target list
iteration.  Independ of any any other scanning changes it is the
right thing to do.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03 17:17         ` Christoph Hellwig
@ 2016-02-03 17:54           ` James Bottomley
  0 siblings, 0 replies; 24+ messages in thread
From: James Bottomley @ 2016-02-03 17:54 UTC (permalink / raw)
  To: Christoph Hellwig, Martin K. Petersen
  Cc: Sebastian Herbszt, Bart Van Assche, Johannes Thumshirn,
	Dan Williams, linux-scsi

On Wed, 2016-02-03 at 18:17 +0100, Christoph Hellwig wrote:
> On Mon, Feb 01, 2016 at 08:11:29PM -0500, Martin K. Petersen wrote:
> > I am concerned about queuing something as a stable fix if it is
> > just
> > masking a fundamental underlying problem.
> 
> It's not masking a fundamental problem.  It fixes the target
> state so that we can mark a starget as being under deletion
> before we have to drop the list protecting the target list
> iteration.  Independ of any any other scanning changes it is the
> right thing to do.

It introduces a bug while doing so ... that's a problem.

James



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: *** GMX Spamverdacht *** Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02  1:11       ` Martin K. Petersen
  2016-02-02  9:03         ` Johannes Thumshirn
  2016-02-03 17:17         ` Christoph Hellwig
@ 2016-02-03 21:37         ` Sebastian Herbszt
  2016-02-07 22:48         ` Sebastian Herbszt
  3 siblings, 0 replies; 24+ messages in thread
From: Sebastian Herbszt @ 2016-02-03 21:37 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Bart Van Assche, Christoph Hellwig,
	Johannes Thumshirn, Dan Williams, linux-scsi, Sebastian Herbszt

Martin K. Petersen wrote:
> >>>>> "Sebastian" == Sebastian Herbszt <herbszt@gmx.de> writes:
> 
> >> The only change from the current workflow is that the DEL transition
> >> (now the reaped flag) is done before the spin lock is dropped which
> >> would fix a tiny window for two threads both trying to remove the
> >> same target, but there's nothing that could possibly fix an iterative
> >> soft lockup caused by restarting the loop, which is what the
> >> changelog says.
> 
> Sebastian> James, Martin, what's the status of this patch?  I still hit
> Sebastian> the reported soft lockup on 4.5-rc1.
> 
> And you have verified that Bart's patch applied on top of 4.5-rc1 still
> fixes the lockup? (I know you tested a previous version)

I did not (yet) - will do soon.

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02 11:46       ` James Bottomley
  2016-02-02 18:29         ` Bart Van Assche
@ 2016-02-03 22:38         ` Sebastian Herbszt
  2016-02-03 22:55           ` James Bottomley
  2016-02-10 14:05           ` Johannes Thumshirn
  1 sibling, 2 replies; 24+ messages in thread
From: Sebastian Herbszt @ 2016-02-03 22:38 UTC (permalink / raw)
  To: James Bottomley, Dick Kennedy
  Cc: Bart Van Assche, Martin K. Petersen, Christoph Hellwig,
	Johannes Thumshirn, Dan Williams, linux-scsi, Sebastian Herbszt

James Bottomley wrote:
> On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > On 01/19/16 17:03, James Bottomley wrote:
> > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > "Bart" == Bart Van Assche <bart.vanassche@sandisk.com>
> > > > > > > > > writes:
> > > > 
> > > > Bart> Instead of representing the states "visible in sysfs" and
> > > > "has
> > > > Bart> been removed from the target list" by a single state
> > > > variable,
> > > > use
> > > > Bart> two variables to represent this information.
> > > > 
> > > > James: Are you happy with the latest iteration of this? Should I
> > > > queue
> > > > it?
> > > 
> > > Well, I'm OK with the patch: it's a simple transformation of the
> > > enumerated state to a two bit state.  What I can't see is how it
> > > fixes
> > > any soft lockup.
> > > 
> > > The only change from the current workflow is that the DEL
> > > transition
> > > (now the reaped flag) is done before the spin lock is dropped which
> > > would fix a tiny window for two threads both trying to remove the
> > > same
> > > target, but there's nothing that could possibly fix an iterative
> > > soft
> > > lockup caused by restarting the loop, which is what the changelog
> > > says.
> > 
> > Hello James,
> > 
> > scsi_remove_target() doesn't lock the scan_mutex which means that 
> > concurrent SCSI scanning activity is not prohibited. Such scanning 
> > activity can postpone the transition of the state of a SCSI target 
> > into STARGET_DEL. I think if the scheduler decides to run the thread 
> > that executes scsi_remove_target() on the same CPU as the scanning 
> > code after the scanning code has obtained a reap ref and before the 
> > scanning code has released the reap ref again that the soft lockup 
> > can be triggered that has been reported by Sebastian Herbszt.
> 
> OK, I finally understand the scenario;  I'm not sure I understand how
> we're getting concurrent scanning and removal from a simple rmmod ... I
> take it this is insmod rmmod in a tight loop?

I am able to trigger the soft lockup with this test case run once:

modprobe lpfc
run fio for 10 seconds
rmmod lpfc

My test setup involves running qla2xxx in target mode (SCST) and
lpfc as initiator on the same system with one exported volume.

Dick, how did you trigger the lockup?

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03 22:38         ` Sebastian Herbszt
@ 2016-02-03 22:55           ` James Bottomley
  2016-02-03 23:28             ` Sebastian Herbszt
  2016-02-07 22:56             ` Sebastian Herbszt
  2016-02-10 14:05           ` Johannes Thumshirn
  1 sibling, 2 replies; 24+ messages in thread
From: James Bottomley @ 2016-02-03 22:55 UTC (permalink / raw)
  To: Sebastian Herbszt, Dick Kennedy
  Cc: Bart Van Assche, Martin K. Petersen, Christoph Hellwig,
	Johannes Thumshirn, Dan Williams, linux-scsi

On Wed, 2016-02-03 at 23:38 +0100, Sebastian Herbszt wrote:
> James Bottomley wrote:
> > On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > > On 01/19/16 17:03, James Bottomley wrote:
> > > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > > "Bart" == Bart Van Assche <
> > > > > > > > > > bart.vanassche@sandisk.com>
> > > > > > > > > > writes:
> > > > > 
> > > > > Bart> Instead of representing the states "visible in sysfs"
> > > > > and
> > > > > "has
> > > > > Bart> been removed from the target list" by a single state
> > > > > variable,
> > > > > use
> > > > > Bart> two variables to represent this information.
> > > > > 
> > > > > James: Are you happy with the latest iteration of this?
> > > > > Should I
> > > > > queue
> > > > > it?
> > > > 
> > > > Well, I'm OK with the patch: it's a simple transformation of
> > > > the
> > > > enumerated state to a two bit state.  What I can't see is how
> > > > it
> > > > fixes
> > > > any soft lockup.
> > > > 
> > > > The only change from the current workflow is that the DEL
> > > > transition
> > > > (now the reaped flag) is done before the spin lock is dropped
> > > > which
> > > > would fix a tiny window for two threads both trying to remove
> > > > the
> > > > same
> > > > target, but there's nothing that could possibly fix an
> > > > iterative
> > > > soft
> > > > lockup caused by restarting the loop, which is what the
> > > > changelog
> > > > says.
> > > 
> > > Hello James,
> > > 
> > > scsi_remove_target() doesn't lock the scan_mutex which means that
> > > concurrent SCSI scanning activity is not prohibited. Such
> > > scanning 
> > > activity can postpone the transition of the state of a SCSI
> > > target 
> > > into STARGET_DEL. I think if the scheduler decides to run the
> > > thread 
> > > that executes scsi_remove_target() on the same CPU as the
> > > scanning 
> > > code after the scanning code has obtained a reap ref and before
> > > the 
> > > scanning code has released the reap ref again that the soft
> > > lockup 
> > > can be triggered that has been reported by Sebastian Herbszt.
> > 
> > OK, I finally understand the scenario;  I'm not sure I understand
> > how
> > we're getting concurrent scanning and removal from a simple rmmod
> > ... I
> > take it this is insmod rmmod in a tight loop?
> 
> I am able to trigger the soft lockup with this test case run once:
> 
> modprobe lpfc
> run fio for 10 seconds
> rmmod lpfc
> 
> My test setup involves running qla2xxx in target mode (SCST) and
> lpfc as initiator on the same system with one exported volume.

Hm, that doesn't seem to involve scanning colliding with removal. 
 Probably something else is pinning the target for some reason ...
unless there's some instability or constant change on the FC fabric
itself?  FC is hotplug, so if devices continually appear and disappear,
they'd effectively cause a scan of the added device to take place even
during the rmmod.

Anyway, does the last_scan patch fix the issue?

James


> Dick, how did you trigger the lockup?
> 
> Sebastian
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi"
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03 22:55           ` James Bottomley
@ 2016-02-03 23:28             ` Sebastian Herbszt
  2016-02-07 22:56             ` Sebastian Herbszt
  1 sibling, 0 replies; 24+ messages in thread
From: Sebastian Herbszt @ 2016-02-03 23:28 UTC (permalink / raw)
  To: James Bottomley
  Cc: Dick Kennedy, Bart Van Assche, Martin K. Petersen,
	Christoph Hellwig, Johannes Thumshirn, Dan Williams, linux-scsi,
	Sebastian Herbszt

James Bottomley wrote:
> On Wed, 2016-02-03 at 23:38 +0100, Sebastian Herbszt wrote:
> > James Bottomley wrote:
> > > On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > > > On 01/19/16 17:03, James Bottomley wrote:
> > > > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > > > "Bart" == Bart Van Assche <
> > > > > > > > > > > bart.vanassche@sandisk.com>
> > > > > > > > > > > writes:
> > > > > > 
> > > > > > Bart> Instead of representing the states "visible in sysfs"
> > > > > > and
> > > > > > "has
> > > > > > Bart> been removed from the target list" by a single state
> > > > > > variable,
> > > > > > use
> > > > > > Bart> two variables to represent this information.
> > > > > > 
> > > > > > James: Are you happy with the latest iteration of this?
> > > > > > Should I
> > > > > > queue
> > > > > > it?
> > > > > 
> > > > > Well, I'm OK with the patch: it's a simple transformation of
> > > > > the
> > > > > enumerated state to a two bit state.  What I can't see is how
> > > > > it
> > > > > fixes
> > > > > any soft lockup.
> > > > > 
> > > > > The only change from the current workflow is that the DEL
> > > > > transition
> > > > > (now the reaped flag) is done before the spin lock is dropped
> > > > > which
> > > > > would fix a tiny window for two threads both trying to remove
> > > > > the
> > > > > same
> > > > > target, but there's nothing that could possibly fix an
> > > > > iterative
> > > > > soft
> > > > > lockup caused by restarting the loop, which is what the
> > > > > changelog
> > > > > says.
> > > > 
> > > > Hello James,
> > > > 
> > > > scsi_remove_target() doesn't lock the scan_mutex which means that
> > > > concurrent SCSI scanning activity is not prohibited. Such
> > > > scanning 
> > > > activity can postpone the transition of the state of a SCSI
> > > > target 
> > > > into STARGET_DEL. I think if the scheduler decides to run the
> > > > thread 
> > > > that executes scsi_remove_target() on the same CPU as the
> > > > scanning 
> > > > code after the scanning code has obtained a reap ref and before
> > > > the 
> > > > scanning code has released the reap ref again that the soft
> > > > lockup 
> > > > can be triggered that has been reported by Sebastian Herbszt.
> > > 
> > > OK, I finally understand the scenario;  I'm not sure I understand
> > > how
> > > we're getting concurrent scanning and removal from a simple rmmod
> > > ... I
> > > take it this is insmod rmmod in a tight loop?
> > 
> > I am able to trigger the soft lockup with this test case run once:
> > 
> > modprobe lpfc
> > run fio for 10 seconds
> > rmmod lpfc
> > 
> > My test setup involves running qla2xxx in target mode (SCST) and
> > lpfc as initiator on the same system with one exported volume.
> 
> Hm, that doesn't seem to involve scanning colliding with removal. 
>  Probably something else is pinning the target for some reason ...
> unless there's some instability or constant change on the FC fabric
> itself?  FC is hotplug, so if devices continually appear and disappear,
> they'd effectively cause a scan of the added device to take place even
> during the rmmod.

No FC fabric involved; there is a direct connection between both HBAs.

> Anyway, does the last_scan patch fix the issue?

Will try to test it soon.

> James

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-02  1:11       ` Martin K. Petersen
                           ` (2 preceding siblings ...)
  2016-02-03 21:37         ` *** GMX Spamverdacht *** " Sebastian Herbszt
@ 2016-02-07 22:48         ` Sebastian Herbszt
  3 siblings, 0 replies; 24+ messages in thread
From: Sebastian Herbszt @ 2016-02-07 22:48 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: James Bottomley, Bart Van Assche, Christoph Hellwig,
	Johannes Thumshirn, Dan Williams, linux-scsi, Sebastian Herbszt

Martin K. Petersen wrote:
> >>>>> "Sebastian" == Sebastian Herbszt <herbszt@gmx.de> writes:
> 
> >> The only change from the current workflow is that the DEL transition
> >> (now the reaped flag) is done before the spin lock is dropped which
> >> would fix a tiny window for two threads both trying to remove the
> >> same target, but there's nothing that could possibly fix an iterative
> >> soft lockup caused by restarting the loop, which is what the
> >> changelog says.
> 
> Sebastian> James, Martin, what's the status of this patch?  I still hit
> Sebastian> the reported soft lockup on 4.5-rc1.
> 
> And you have verified that Bart's patch applied on top of 4.5-rc1 still
> fixes the lockup? (I know you tested a previous version)

I now have verified that Bart's patch does fix my soft lockup on 4.5-rc2.

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03 22:55           ` James Bottomley
  2016-02-03 23:28             ` Sebastian Herbszt
@ 2016-02-07 22:56             ` Sebastian Herbszt
  1 sibling, 0 replies; 24+ messages in thread
From: Sebastian Herbszt @ 2016-02-07 22:56 UTC (permalink / raw)
  To: James Bottomley
  Cc: Dick Kennedy, Bart Van Assche, Martin K. Petersen,
	Christoph Hellwig, Johannes Thumshirn, Dan Williams, linux-scsi,
	Sebastian Herbszt

James Bottomley wrote:
> On Wed, 2016-02-03 at 23:38 +0100, Sebastian Herbszt wrote:
> > James Bottomley wrote:
> > > On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > > > On 01/19/16 17:03, James Bottomley wrote:
> > > > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > > > "Bart" == Bart Van Assche <
> > > > > > > > > > > bart.vanassche@sandisk.com>
> > > > > > > > > > > writes:
> > > > > > 
> > > > > > Bart> Instead of representing the states "visible in sysfs"
> > > > > > and
> > > > > > "has
> > > > > > Bart> been removed from the target list" by a single state
> > > > > > variable,
> > > > > > use
> > > > > > Bart> two variables to represent this information.
> > > > > > 
> > > > > > James: Are you happy with the latest iteration of this?
> > > > > > Should I
> > > > > > queue
> > > > > > it?
> > > > > 
> > > > > Well, I'm OK with the patch: it's a simple transformation of
> > > > > the
> > > > > enumerated state to a two bit state.  What I can't see is how
> > > > > it
> > > > > fixes
> > > > > any soft lockup.
> > > > > 
> > > > > The only change from the current workflow is that the DEL
> > > > > transition
> > > > > (now the reaped flag) is done before the spin lock is dropped
> > > > > which
> > > > > would fix a tiny window for two threads both trying to remove
> > > > > the
> > > > > same
> > > > > target, but there's nothing that could possibly fix an
> > > > > iterative
> > > > > soft
> > > > > lockup caused by restarting the loop, which is what the
> > > > > changelog
> > > > > says.
> > > > 
> > > > Hello James,
> > > > 
> > > > scsi_remove_target() doesn't lock the scan_mutex which means that
> > > > concurrent SCSI scanning activity is not prohibited. Such
> > > > scanning 
> > > > activity can postpone the transition of the state of a SCSI
> > > > target 
> > > > into STARGET_DEL. I think if the scheduler decides to run the
> > > > thread 
> > > > that executes scsi_remove_target() on the same CPU as the
> > > > scanning 
> > > > code after the scanning code has obtained a reap ref and before
> > > > the 
> > > > scanning code has released the reap ref again that the soft
> > > > lockup 
> > > > can be triggered that has been reported by Sebastian Herbszt.
> > > 
> > > OK, I finally understand the scenario;  I'm not sure I understand
> > > how
> > > we're getting concurrent scanning and removal from a simple rmmod
> > > ... I
> > > take it this is insmod rmmod in a tight loop?
> > 
> > I am able to trigger the soft lockup with this test case run once:
> > 
> > modprobe lpfc
> > run fio for 10 seconds
> > rmmod lpfc
> > 
> > My test setup involves running qla2xxx in target mode (SCST) and
> > lpfc as initiator on the same system with one exported volume.
> 
> Hm, that doesn't seem to involve scanning colliding with removal. 
>  Probably something else is pinning the target for some reason ...
> unless there's some instability or constant change on the FC fabric
> itself?  FC is hotplug, so if devices continually appear and disappear,
> they'd effectively cause a scan of the added device to take place even
> during the rmmod.
> 
> Anyway, does the last_scan patch fix the issue?
> 
> James

That patch does also fix my soft lockup.

Sebastian

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-03 22:38         ` Sebastian Herbszt
  2016-02-03 22:55           ` James Bottomley
@ 2016-02-10 14:05           ` Johannes Thumshirn
  2016-02-10 15:34             ` James Bottomley
  1 sibling, 1 reply; 24+ messages in thread
From: Johannes Thumshirn @ 2016-02-10 14:05 UTC (permalink / raw)
  To: Martin K. Petersen, James Bottomley, Bart Van Assche
  Cc: Dick Kennedy, Christoph Hellwig, Dan Williams, linux-scsi,
	Sebastian Herbszt, Hannes Reinecke

On Wed, Feb 03, 2016 at 11:38:16PM +0100, Sebastian Herbszt wrote:
> James Bottomley wrote:
> > On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > > On 01/19/16 17:03, James Bottomley wrote:
> > > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > > "Bart" == Bart Van Assche <bart.vanassche@sandisk.com>
> > > > > > > > > > writes:
> > > > > 
> > > > > Bart> Instead of representing the states "visible in sysfs" and
> > > > > "has
> > > > > Bart> been removed from the target list" by a single state
> > > > > variable,
> > > > > use
> > > > > Bart> two variables to represent this information.
> > > > > 
> > > > > James: Are you happy with the latest iteration of this? Should I
> > > > > queue
> > > > > it?
> > > > 
> > > > Well, I'm OK with the patch: it's a simple transformation of the
> > > > enumerated state to a two bit state.  What I can't see is how it
> > > > fixes
> > > > any soft lockup.
> > > > 
> > > > The only change from the current workflow is that the DEL
> > > > transition
> > > > (now the reaped flag) is done before the spin lock is dropped which
> > > > would fix a tiny window for two threads both trying to remove the
> > > > same
> > > > target, but there's nothing that could possibly fix an iterative
> > > > soft
> > > > lockup caused by restarting the loop, which is what the changelog
> > > > says.
> > > 
> > > Hello James,
> > > 
> > > scsi_remove_target() doesn't lock the scan_mutex which means that 
> > > concurrent SCSI scanning activity is not prohibited. Such scanning 
> > > activity can postpone the transition of the state of a SCSI target 
> > > into STARGET_DEL. I think if the scheduler decides to run the thread 
> > > that executes scsi_remove_target() on the same CPU as the scanning 
> > > code after the scanning code has obtained a reap ref and before the 
> > > scanning code has released the reap ref again that the soft lockup 
> > > can be triggered that has been reported by Sebastian Herbszt.
> > 
> > OK, I finally understand the scenario;  I'm not sure I understand how
> > we're getting concurrent scanning and removal from a simple rmmod ... I
> > take it this is insmod rmmod in a tight loop?
> 
> I am able to trigger the soft lockup with this test case run once:
> 
> modprobe lpfc
> run fio for 10 seconds
> rmmod lpfc
> 
> My test setup involves running qla2xxx in target mode (SCST) and
> lpfc as initiator on the same system with one exported volume.
> 
> Dick, how did you trigger the lockup?
> 
> Sebastian

Hi James, Bart, Martin

Have you already decided, which of the two patches you favour and when it'll
be included?

I have several customer reports that hit this lockup and I don't want to include
one of the patches from the list just to find out the other one's is used in
mainline.

Thanks in advance,
	Johannes

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-10 14:05           ` Johannes Thumshirn
@ 2016-02-10 15:34             ` James Bottomley
  2016-02-10 16:06               ` Johannes Thumshirn
  0 siblings, 1 reply; 24+ messages in thread
From: James Bottomley @ 2016-02-10 15:34 UTC (permalink / raw)
  To: Johannes Thumshirn, Martin K. Petersen, Bart Van Assche
  Cc: Dick Kennedy, Christoph Hellwig, Dan Williams, linux-scsi,
	Sebastian Herbszt, Hannes Reinecke

On Wed, 2016-02-10 at 15:05 +0100, Johannes Thumshirn wrote:
> On Wed, Feb 03, 2016 at 11:38:16PM +0100, Sebastian Herbszt wrote:
> > James Bottomley wrote:
> > > On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > > > On 01/19/16 17:03, James Bottomley wrote:
> > > > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > > > "Bart" == Bart Van Assche <
> > > > > > > > > > > bart.vanassche@sandisk.com>
> > > > > > > > > > > writes:
> > > > > > 
> > > > > > Bart> Instead of representing the states "visible in sysfs"
> > > > > > and
> > > > > > "has
> > > > > > Bart> been removed from the target list" by a single state
> > > > > > variable,
> > > > > > use
> > > > > > Bart> two variables to represent this information.
> > > > > > 
> > > > > > James: Are you happy with the latest iteration of this?
> > > > > > Should I
> > > > > > queue
> > > > > > it?
> > > > > 
> > > > > Well, I'm OK with the patch: it's a simple transformation of
> > > > > the
> > > > > enumerated state to a two bit state.  What I can't see is how
> > > > > it
> > > > > fixes
> > > > > any soft lockup.
> > > > > 
> > > > > The only change from the current workflow is that the DEL
> > > > > transition
> > > > > (now the reaped flag) is done before the spin lock is dropped
> > > > > which
> > > > > would fix a tiny window for two threads both trying to remove
> > > > > the
> > > > > same
> > > > > target, but there's nothing that could possibly fix an
> > > > > iterative
> > > > > soft
> > > > > lockup caused by restarting the loop, which is what the
> > > > > changelog
> > > > > says.
> > > > 
> > > > Hello James,
> > > > 
> > > > scsi_remove_target() doesn't lock the scan_mutex which means
> > > > that 
> > > > concurrent SCSI scanning activity is not prohibited. Such
> > > > scanning 
> > > > activity can postpone the transition of the state of a SCSI
> > > > target 
> > > > into STARGET_DEL. I think if the scheduler decides to run the
> > > > thread 
> > > > that executes scsi_remove_target() on the same CPU as the
> > > > scanning 
> > > > code after the scanning code has obtained a reap ref and before
> > > > the 
> > > > scanning code has released the reap ref again that the soft
> > > > lockup 
> > > > can be triggered that has been reported by Sebastian Herbszt.
> > > 
> > > OK, I finally understand the scenario;  I'm not sure I understand
> > > how
> > > we're getting concurrent scanning and removal from a simple rmmod
> > > ... I
> > > take it this is insmod rmmod in a tight loop?
> > 
> > I am able to trigger the soft lockup with this test case run once:
> > 
> > modprobe lpfc
> > run fio for 10 seconds
> > rmmod lpfc
> > 
> > My test setup involves running qla2xxx in target mode (SCST) and
> > lpfc as initiator on the same system with one exported volume.
> > 
> > Dick, how did you trigger the lockup?
> > 
> > Sebastian
> 
> Hi James, Bart, Martin
> 
> Have you already decided, which of the two patches you favour and 
> when it'llbe included?
> 
> I have several customer reports that hit this lockup and I don't want 
> to include one of the patches from the list just to find out the 
> other one's is used in mainline.

Well, unless the target allocation bug gets fixed in Bart's, it will
have to be the last_scan one.  It's more a hack than a fix, but I
suppose it will do as a bandaid in the meantime.

If you have diagnosed this at customers, I'd still like to know what's
holding the devices on removal.

Thanks,

James


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] Separate target visibility from reaped state information
  2016-02-10 15:34             ` James Bottomley
@ 2016-02-10 16:06               ` Johannes Thumshirn
  0 siblings, 0 replies; 24+ messages in thread
From: Johannes Thumshirn @ 2016-02-10 16:06 UTC (permalink / raw)
  To: James Bottomley
  Cc: Martin K. Petersen, Bart Van Assche, Dick Kennedy,
	Christoph Hellwig, Dan Williams, linux-scsi, Sebastian Herbszt,
	Hannes Reinecke

On Wed, Feb 10, 2016 at 07:34:51AM -0800, James Bottomley wrote:
> On Wed, 2016-02-10 at 15:05 +0100, Johannes Thumshirn wrote:
> > On Wed, Feb 03, 2016 at 11:38:16PM +0100, Sebastian Herbszt wrote:
> > > James Bottomley wrote:
> > > > On Mon, 2016-02-01 at 19:43 -0800, Bart Van Assche wrote:
> > > > > On 01/19/16 17:03, James Bottomley wrote:
> > > > > > On Tue, 2016-01-19 at 19:30 -0500, Martin K. Petersen wrote:
> > > > > > > > > > > > "Bart" == Bart Van Assche <
> > > > > > > > > > > > bart.vanassche@sandisk.com>
[..]
> > > 
> > > Sebastian
> > 
> > Hi James, Bart, Martin
> > 
> > Have you already decided, which of the two patches you favour and 
> > when it'llbe included?
> > 
> > I have several customer reports that hit this lockup and I don't want 
> > to include one of the patches from the list just to find out the 
> > other one's is used in mainline.
> 
> Well, unless the target allocation bug gets fixed in Bart's, it will
> have to be the last_scan one.  It's more a hack than a fix, but I
> suppose it will do as a bandaid in the meantime.
> 
> If you have diagnosed this at customers, I'd still like to know what's
> holding the devices on removal.
> 
> Thanks,
> 
> James

I'll setup a test environment tomorrow and try to get some crash dumps with
the current HEAD.

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-02-10 16:06 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-08 16:51 [PATCH] Separate target visibility from reaped state information Bart Van Assche
2016-01-18  8:55 ` Johannes Thumshirn
2016-01-20  0:30 ` Martin K. Petersen
2016-01-20  1:03   ` James Bottomley
2016-01-31 17:54     ` Sebastian Herbszt
2016-02-02  1:11       ` Martin K. Petersen
2016-02-02  9:03         ` Johannes Thumshirn
2016-02-03 17:17         ` Christoph Hellwig
2016-02-03 17:54           ` James Bottomley
2016-02-03 21:37         ` *** GMX Spamverdacht *** " Sebastian Herbszt
2016-02-07 22:48         ` Sebastian Herbszt
2016-02-02  3:43     ` Bart Van Assche
2016-02-02 11:46       ` James Bottomley
2016-02-02 18:29         ` Bart Van Assche
2016-02-03  0:43           ` James Bottomley
2016-02-03  1:17             ` Bart Van Assche
2016-02-03  2:01               ` James Bottomley
2016-02-03 22:38         ` Sebastian Herbszt
2016-02-03 22:55           ` James Bottomley
2016-02-03 23:28             ` Sebastian Herbszt
2016-02-07 22:56             ` Sebastian Herbszt
2016-02-10 14:05           ` Johannes Thumshirn
2016-02-10 15:34             ` James Bottomley
2016-02-10 16:06               ` Johannes Thumshirn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.