All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
@ 2016-05-31  8:38 Wei Fang
  2016-05-31  8:38 ` [PATCH v2 2/2] Documentation/scsi: update scsi_eh.txt about ->host_failed Wei Fang
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Wei Fang @ 2016-05-31  8:38 UTC (permalink / raw)
  To: tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc, Wei Fang

sas_ata_strategy_handler() adds the works of the ata error handler
to system_unbound_wq. This workqueue asynchronously runs work items,
so the ata error handler will be performed concurrently on different
CPUs. In this case, ->host_failed will be decreased simultaneously in
scsi_eh_finish_cmd() on different CPUs, and become abnormal.

It will lead to permanently inequal between ->host_failed and
 ->host_busy, and scsi error handler thread won't become running.
IO errors after that won't be handled forever.

Use atomic type for ->host_failed to fix this race.

This fixes the problem introduced in
commit 50824d6c5657 ("[SCSI] libsas: async ata-eh").

Reviewed-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Wei Fang <fangwei1@huawei.com>
---
 drivers/ata/libata-eh.c             |    2 +-
 drivers/scsi/libsas/sas_scsi_host.c |    5 +++--
 drivers/scsi/scsi.c                 |    2 +-
 drivers/scsi/scsi_error.c           |   15 +++++++++------
 drivers/scsi/scsi_lib.c             |    3 ++-
 include/scsi/scsi_host.h            |    2 +-
 6 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 961acc7..a0e7612 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	ata_scsi_port_error_handler(host, ap);
 
 	/* finish or retry handled scmd's and clean up */
-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+	WARN_ON(atomic_read(&host->host_failed) || !list_empty(&eh_work_q));
 
 	DPRINTK("EXIT\n");
 }
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 519dac4..8d74003 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -757,7 +757,8 @@ retry:
 	spin_unlock_irq(shost->host_lock);
 
 	SAS_DPRINTK("Enter %s busy: %d failed: %d\n",
-		    __func__, atomic_read(&shost->host_busy), shost->host_failed);
+		    __func__, atomic_read(&shost->host_busy),
+		    atomic_read(&shost->host_failed));
 	/*
 	 * Deal with commands that still have SAS tasks (i.e. they didn't
 	 * complete via the normal sas_task completion mechanism),
@@ -800,7 +801,7 @@ out:
 
 	SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n",
 		    __func__, atomic_read(&shost->host_busy),
-		    shost->host_failed, tries);
+		    atomic_read(&shost->host_failed), tries);
 }
 
 enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1deb6ad..7840915 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -527,7 +527,7 @@ void scsi_log_completion(struct scsi_cmnd *cmd, int disposition)
 				scmd_printk(KERN_INFO, cmd,
 					    "scsi host busy %d failed %d\n",
 					    atomic_read(&cmd->device->host->host_busy),
-					    cmd->device->host->host_failed);
+					    atomic_read(&cmd->device->host->host_failed));
 		}
 	}
 }
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 984ddcb..f37666f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -62,7 +62,8 @@ static int scsi_try_to_abort_cmd(struct scsi_host_template *,
 /* called with shost->host_lock held */
 void scsi_eh_wakeup(struct Scsi_Host *shost)
 {
-	if (atomic_read(&shost->host_busy) == shost->host_failed) {
+	if (atomic_read(&shost->host_busy) ==
+	    atomic_read(&shost->host_failed)) {
 		trace_scsi_eh_wakeup(shost);
 		wake_up_process(shost->ehandler);
 		SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
@@ -250,7 +251,7 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
 		eh_flag &= ~SCSI_EH_CANCEL_CMD;
 	scmd->eh_eflags |= eh_flag;
 	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-	shost->host_failed++;
+	atomic_inc(&shost->host_failed);
 	scsi_eh_wakeup(shost);
  out_unlock:
 	spin_unlock_irqrestore(shost->host_lock, flags);
@@ -1127,7 +1128,7 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
  */
 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 {
-	scmd->device->host->host_failed--;
+	atomic_dec(&scmd->device->host->host_failed);
 	scmd->eh_eflags = 0;
 	list_move_tail(&scmd->eh_entry, done_q);
 }
@@ -2190,8 +2191,10 @@ int scsi_error_handler(void *data)
 		if (kthread_should_stop())
 			break;
 
-		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
-		    shost->host_failed != atomic_read(&shost->host_busy)) {
+		if ((atomic_read(&shost->host_failed) == 0 &&
+		     shost->host_eh_scheduled == 0) ||
+		    (atomic_read(&shost->host_failed) !=
+		     atomic_read(&shost->host_busy))) {
 			SCSI_LOG_ERROR_RECOVERY(1,
 				shost_printk(KERN_INFO, shost,
 					     "scsi_eh_%d: sleeping\n",
@@ -2205,7 +2208,7 @@ int scsi_error_handler(void *data)
 			shost_printk(KERN_INFO, shost,
 				     "scsi_eh_%d: waking up %d/%d/%d\n",
 				     shost->host_no, shost->host_eh_scheduled,
-				     shost->host_failed,
+				     atomic_read(&shost->host_failed),
 				     atomic_read(&shost->host_busy)));
 
 		/*
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8106515..fb3cc5d 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -318,7 +318,8 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 		atomic_dec(&starget->target_busy);
 
 	if (unlikely(scsi_host_in_recovery(shost) &&
-		     (shost->host_failed || shost->host_eh_scheduled))) {
+		     (atomic_read(&shost->host_failed) ||
+		      shost->host_eh_scheduled))) {
 		spin_lock_irqsave(shost->host_lock, flags);
 		scsi_eh_wakeup(shost);
 		spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index fcfa3d7..654435f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -576,7 +576,7 @@ struct Scsi_Host {
 	atomic_t host_busy;		   /* commands actually active on low-level */
 	atomic_t host_blocked;
 
-	unsigned int host_failed;	   /* commands that failed.
+	atomic_t host_failed;		   /* commands that failed.
 					      protected by host_lock */
 	unsigned int host_eh_scheduled;    /* EH scheduled without command */
     
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] Documentation/scsi: update scsi_eh.txt about ->host_failed
  2016-05-31  8:38 [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Wei Fang
@ 2016-05-31  8:38 ` Wei Fang
  2016-05-31 14:33 ` [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Tejun Heo
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Wei Fang @ 2016-05-31  8:38 UTC (permalink / raw)
  To: tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc, Wei Fang

Update the new concurrency rules of ->host_failed.

Signed-off-by: Wei Fang <fangwei1@huawei.com>
---
 Documentation/scsi/scsi_eh.txt |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 8638f61..e6d0de2 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -252,7 +252,7 @@ scmd->allowed.
 	- set scmd->eh_eflags
 	- add scmd to shost->eh_cmd_q
 	- set SHOST_RECOVERY
-	- shost->host_failed++
+	- atomic_inc(&shost->host_failed)
     LOCKING: shost->host_lock
 
  2. EH starts
@@ -263,7 +263,7 @@ scmd->allowed.
 
  3. scmd recovered
     ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
-	- shost->host_failed--
+	- atomic_dec(&shost->host_failed)
 	- clear scmd->eh_eflags
 	- scsi_setup_cmd_retry()
 	- move from local eh_work_q to local eh_done_q
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-05-31  8:38 [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Wei Fang
  2016-05-31  8:38 ` [PATCH v2 2/2] Documentation/scsi: update scsi_eh.txt about ->host_failed Wei Fang
@ 2016-05-31 14:33 ` Tejun Heo
  2016-06-01  1:20   ` Wei Fang
  2016-06-01  3:21 ` Dan Williams
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Tejun Heo @ 2016-05-31 14:33 UTC (permalink / raw)
  To: Wei Fang
  Cc: jejb, martin.petersen, corbet, hch, dan.j.williams, linux-ide,
	linux-scsi, linux-doc

On Tue, May 31, 2016 at 04:38:17PM +0800, Wei Fang wrote:
> sas_ata_strategy_handler() adds the works of the ata error handler
> to system_unbound_wq. This workqueue asynchronously runs work items,

Are there more than one error handling work items per host?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-05-31 14:33 ` [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Tejun Heo
@ 2016-06-01  1:20   ` Wei Fang
  0 siblings, 0 replies; 14+ messages in thread
From: Wei Fang @ 2016-06-01  1:20 UTC (permalink / raw)
  To: Tejun Heo
  Cc: jejb, martin.petersen, corbet, hch, dan.j.williams, linux-ide,
	linux-scsi, linux-doc

Hi, Tejun,

On 2016/5/31 22:33, Tejun Heo wrote:
> On Tue, May 31, 2016 at 04:38:17PM +0800, Wei Fang wrote:
>> sas_ata_strategy_handler() adds the works of the ata error handler
>> to system_unbound_wq. This workqueue asynchronously runs work items,
> 
> Are there more than one error handling work items per host?

The ata error handler here means async_sas_ata_eh(), every port will
execute it's own async_sas_ata_eh() in sas_ata_strategy_handler().

Thanks,
Wei

> 
> Thanks.
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-05-31  8:38 [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Wei Fang
  2016-05-31  8:38 ` [PATCH v2 2/2] Documentation/scsi: update scsi_eh.txt about ->host_failed Wei Fang
  2016-05-31 14:33 ` [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Tejun Heo
@ 2016-06-01  3:21 ` Dan Williams
  2016-06-01  3:30   ` Dan Williams
  2016-06-01 14:06 ` James Bottomley
  2016-06-01 14:36 ` Kevin Groeneveld
  4 siblings, 1 reply; 14+ messages in thread
From: Dan Williams @ 2016-06-01  3:21 UTC (permalink / raw)
  To: Wei Fang
  Cc: Tejun Heo, jejb, Martin K. Petersen, Jonathan Corbet,
	Christoph Hellwig, IDE/ATA development list, linux-scsi,
	linux-doc

On Tue, May 31, 2016 at 1:38 AM, Wei Fang <fangwei1@huawei.com> wrote:
> sas_ata_strategy_handler() adds the works of the ata error handler
> to system_unbound_wq. This workqueue asynchronously runs work items,
> so the ata error handler will be performed concurrently on different
> CPUs. In this case, ->host_failed will be decreased simultaneously in
> scsi_eh_finish_cmd() on different CPUs, and become abnormal.
>
> It will lead to permanently inequal between ->host_failed and
>  ->host_busy, and scsi error handler thread won't become running.
> IO errors after that won't be handled forever.
>
> Use atomic type for ->host_failed to fix this race.
>
> This fixes the problem introduced in
> commit 50824d6c5657 ("[SCSI] libsas: async ata-eh").
>
> Reviewed-by: Christoph Hellwig <hch@infradead.org>
> Signed-off-by: Wei Fang <fangwei1@huawei.com>

Acked-by: Dan Williams <dan.j.williams@intel.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-01  3:21 ` Dan Williams
@ 2016-06-01  3:30   ` Dan Williams
  0 siblings, 0 replies; 14+ messages in thread
From: Dan Williams @ 2016-06-01  3:30 UTC (permalink / raw)
  To: Wei Fang
  Cc: Tejun Heo, jejb, Martin K. Petersen, Jonathan Corbet,
	Christoph Hellwig, IDE/ATA development list, linux-scsi,
	linux-doc

On Tue, May 31, 2016 at 8:21 PM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Tue, May 31, 2016 at 1:38 AM, Wei Fang <fangwei1@huawei.com> wrote:
>> sas_ata_strategy_handler() adds the works of the ata error handler
>> to system_unbound_wq. This workqueue asynchronously runs work items,
>> so the ata error handler will be performed concurrently on different
>> CPUs. In this case, ->host_failed will be decreased simultaneously in
>> scsi_eh_finish_cmd() on different CPUs, and become abnormal.
>>
>> It will lead to permanently inequal between ->host_failed and
>>  ->host_busy, and scsi error handler thread won't become running.
>> IO errors after that won't be handled forever.
>>
>> Use atomic type for ->host_failed to fix this race.
>>
>> This fixes the problem introduced in
>> commit 50824d6c5657 ("[SCSI] libsas: async ata-eh").
>>
>> Reviewed-by: Christoph Hellwig <hch@infradead.org>
>> Signed-off-by: Wei Fang <fangwei1@huawei.com>
>
> Acked-by: Dan Williams <dan.j.williams@intel.com>

Should also tag this for -stable.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-05-31  8:38 [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Wei Fang
                   ` (2 preceding siblings ...)
  2016-06-01  3:21 ` Dan Williams
@ 2016-06-01 14:06 ` James Bottomley
  2016-06-02  1:58   ` Wei Fang
  2016-06-01 14:36 ` Kevin Groeneveld
  4 siblings, 1 reply; 14+ messages in thread
From: James Bottomley @ 2016-06-01 14:06 UTC (permalink / raw)
  To: Wei Fang, tj, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

On Tue, 2016-05-31 at 16:38 +0800, Wei Fang wrote:
> sas_ata_strategy_handler() adds the works of the ata error handler
> to system_unbound_wq. This workqueue asynchronously runs work items,
> so the ata error handler will be performed concurrently on different
> CPUs. In this case, ->host_failed will be decreased simultaneously in
> scsi_eh_finish_cmd() on different CPUs, and become abnormal.
> 
> It will lead to permanently inequal between ->host_failed and
>  ->host_busy, and scsi error handler thread won't become running.
> IO errors after that won't be handled forever.
> 
> Use atomic type for ->host_failed to fix this race.

As I said previously, you don't need atomics to do this, could you just
remove the decrement in scsi_eh_finish_command() and zero the counter
after the strategy handler completes.

Thanks,

James



^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-05-31  8:38 [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Wei Fang
                   ` (3 preceding siblings ...)
  2016-06-01 14:06 ` James Bottomley
@ 2016-06-01 14:36 ` Kevin Groeneveld
  2016-06-01 15:29   ` Bart Van Assche
  2016-06-02  2:37   ` Wei Fang
  4 siblings, 2 replies; 14+ messages in thread
From: Kevin Groeneveld @ 2016-06-01 14:36 UTC (permalink / raw)
  To: Wei Fang, tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

> Subject: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed

I wonder if this could be related to http://www.spinics.net/lists/linux-scsi/msg86808.html?

I never did get to the bottom of that.  If I have time I hope to retest my scsi hang issue with this patch.


Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-01 14:36 ` Kevin Groeneveld
@ 2016-06-01 15:29   ` Bart Van Assche
  2016-06-01 15:38     ` James Bottomley
  2016-06-02  2:37   ` Wei Fang
  1 sibling, 1 reply; 14+ messages in thread
From: Bart Van Assche @ 2016-06-01 15:29 UTC (permalink / raw)
  To: Kevin Groeneveld, Wei Fang, tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

On 06/01/2016 07:36 AM, Kevin Groeneveld wrote:
>> Subject: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
>
> I wonder if this could be related to http://www.spinics.net/lists/linux-scsi/msg86808.html?
>
> I never did get to the bottom of that.  If I have time I hope to retest my scsi hang issue with this patch.

I've never seen that hang with the ib_srp driver (which is a SCSI LLD). 
That probably means that the SATA code uses the SCSI error handler in 
another way than other SCSI LLDs.

Bart.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-01 15:29   ` Bart Van Assche
@ 2016-06-01 15:38     ` James Bottomley
  0 siblings, 0 replies; 14+ messages in thread
From: James Bottomley @ 2016-06-01 15:38 UTC (permalink / raw)
  To: Bart Van Assche, Kevin Groeneveld, Wei Fang, tj, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

On Wed, 2016-06-01 at 08:29 -0700, Bart Van Assche wrote:
> On 06/01/2016 07:36 AM, Kevin Groeneveld wrote:
> > > Subject: [PATCH v2 1/2] scsi: fix race between simultaneous
> > > decrements of ->host_failed
> > 
> > I wonder if this could be related to 
> > http://www.spinics.net/lists/linux-scsi/msg86808.html?
> > 
> > I never did get to the bottom of that.  If I have time I hope to
> > retest my scsi hang issue with this patch.
> 
> I've never seen that hang with the ib_srp driver (which is a SCSI 
> LLD). That probably means that the SATA code uses the SCSI error 
> handler in another way than other SCSI LLDs.

It's the ata_sas error handler, which is split between libsas and
libata.  It does one thread of execution per ata port when the strategy
handler is invoked.  This behaviour is pretty much unique at the
moment.

James



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-01 14:06 ` James Bottomley
@ 2016-06-02  1:58   ` Wei Fang
  0 siblings, 0 replies; 14+ messages in thread
From: Wei Fang @ 2016-06-02  1:58 UTC (permalink / raw)
  To: James Bottomley, tj, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

Hi, James,

On 2016/6/1 22:06, James Bottomley wrote:
> On Tue, 2016-05-31 at 16:38 +0800, Wei Fang wrote:
>> sas_ata_strategy_handler() adds the works of the ata error handler
>> to system_unbound_wq. This workqueue asynchronously runs work items,
>> so the ata error handler will be performed concurrently on different
>> CPUs. In this case, ->host_failed will be decreased simultaneously in
>> scsi_eh_finish_cmd() on different CPUs, and become abnormal.
>>
>> It will lead to permanently inequal between ->host_failed and
>>  ->host_busy, and scsi error handler thread won't become running.
>> IO errors after that won't be handled forever.
>>
>> Use atomic type for ->host_failed to fix this race.
> 
> As I said previously, you don't need atomics to do this, could you just
> remove the decrement in scsi_eh_finish_command() and zero the counter
> after the strategy handler completes.
> 

OK, I'll send v3 later.

Thanks,
Wei


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-01 14:36 ` Kevin Groeneveld
  2016-06-01 15:29   ` Bart Van Assche
@ 2016-06-02  2:37   ` Wei Fang
  2016-06-02  3:09     ` Wei Fang
  2016-06-06 12:48     ` Kevin Groeneveld
  1 sibling, 2 replies; 14+ messages in thread
From: Wei Fang @ 2016-06-02  2:37 UTC (permalink / raw)
  To: Kevin Groeneveld, tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

Hi, Kevin,

On 2016/6/1 22:36, Kevin Groeneveld wrote:
>> Subject: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
> 
> I wonder if this could be related to http://www.spinics.net/lists/linux-scsi/msg86808.html?
> 
> I never did get to the bottom of that.  If I have time I hope to retest my scsi hang issue with this patch.
> 

The concurrently decrements of host_failed only lead to abnormal
of host_failed, host_busy will be zero after error handler, and
the result may be host_failed > host_busy forever. But in your
case, host_busy > host_failed, so I think it's not the same
case. I'm afraid that this patch can't fix your scsi hang issue.

Thanks,
Wei


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-02  2:37   ` Wei Fang
@ 2016-06-02  3:09     ` Wei Fang
  2016-06-06 12:48     ` Kevin Groeneveld
  1 sibling, 0 replies; 14+ messages in thread
From: Wei Fang @ 2016-06-02  3:09 UTC (permalink / raw)
  To: Kevin Groeneveld, tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc


On 2016/6/2 10:37, Wei Fang wrote:
> Hi, Kevin,
> 
> On 2016/6/1 22:36, Kevin Groeneveld wrote:
>>> Subject: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
>>
>> I wonder if this could be related to http://www.spinics.net/lists/linux-scsi/msg86808.html?
>>
>> I never did get to the bottom of that.  If I have time I hope to retest my scsi hang issue with this patch.
>>
> 
> The concurrently decrements of host_failed only lead to abnormal
> of host_failed, host_busy will be zero after error handler, and
> the result may be host_failed > host_busy forever. But in your
> case, host_busy > host_failed, so I think it's not the same
> case. I'm afraid that this patch can't fix your scsi hang issue.

Something wrong in my words. host_busy may not be zero after error
handler, but the result is true that missing decrement of host_failed
may lead to host_failed > host_busy.

Thanks,
Wei


^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed
  2016-06-02  2:37   ` Wei Fang
  2016-06-02  3:09     ` Wei Fang
@ 2016-06-06 12:48     ` Kevin Groeneveld
  1 sibling, 0 replies; 14+ messages in thread
From: Kevin Groeneveld @ 2016-06-06 12:48 UTC (permalink / raw)
  To: Wei Fang, tj, jejb, martin.petersen, corbet
  Cc: hch, dan.j.williams, linux-ide, linux-scsi, linux-doc

-----Original Message-----
From: Wei Fang [mailto:fangwei1@huawei.com] 
Sent: June-01-16 10:37 PM

>The concurrently decrements of host_failed only lead to abnormal of host_failed, host_busy will be zero after error handler, and >the result may be host_failed > host_busy forever. But in your case, host_busy > host_failed, so I think it's not the same case. I'm >afraid that this patch can't fix your scsi hang issue.

I tested the patch and you (and others also suggested similar) are correct that it does not fix the error handling issue I am having with port multipliers and CD-ROM+HDD.


Kevin

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2016-06-06 12:48 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-31  8:38 [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Wei Fang
2016-05-31  8:38 ` [PATCH v2 2/2] Documentation/scsi: update scsi_eh.txt about ->host_failed Wei Fang
2016-05-31 14:33 ` [PATCH v2 1/2] scsi: fix race between simultaneous decrements of ->host_failed Tejun Heo
2016-06-01  1:20   ` Wei Fang
2016-06-01  3:21 ` Dan Williams
2016-06-01  3:30   ` Dan Williams
2016-06-01 14:06 ` James Bottomley
2016-06-02  1:58   ` Wei Fang
2016-06-01 14:36 ` Kevin Groeneveld
2016-06-01 15:29   ` Bart Van Assche
2016-06-01 15:38     ` James Bottomley
2016-06-02  2:37   ` Wei Fang
2016-06-02  3:09     ` Wei Fang
2016-06-06 12:48     ` Kevin Groeneveld

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.