All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] scsi: core: run queue in case of IO queueing failure
@ 2020-07-08 13:14 Ming Lei
  2020-07-14  0:37 ` Ming Lei
  2020-07-18 20:22 ` Bart Van Assche
  0 siblings, 2 replies; 6+ messages in thread
From: Ming Lei @ 2020-07-08 13:14 UTC (permalink / raw)
  To: James Bottomley, linux-scsi, Martin K . Petersen
  Cc: Ming Lei, linux-block, Christoph Hellwig

IO requests may be held in scheduler queue because of resource contention.
However, not like normal completion, when queueing request failed, we don't
ask block layer to queue these requests, so IO hang[1] is caused.

Fix this issue by run queue when IO request failure happens.

[1] IO hang log by running heavy IO with removing scsi device

[   39.054963] scsi 13:0:0:0: rejecting I/O to dead device
[   39.058700] scsi 13:0:0:0: rejecting I/O to dead device
[   39.087855] sd 13:0:0:1: [sdd] Synchronizing SCSI cache
[   39.088909] scsi 13:0:0:1: rejecting I/O to dead device
[   39.095351] scsi 13:0:0:1: rejecting I/O to dead device
[   39.096962] scsi 13:0:0:1: rejecting I/O to dead device
[  247.021859] INFO: task scsi-stress-rem:813 blocked for more than 122 seconds.
[  247.023258]       Not tainted 5.8.0-rc2 #8
[  247.024069] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  247.025331] scsi-stress-rem D    0   813    802 0x00004000
[  247.025334] Call Trace:
[  247.025354]  __schedule+0x504/0x55f
[  247.027987]  schedule+0x72/0xa8
[  247.027991]  blk_mq_freeze_queue_wait+0x63/0x8c
[  247.027994]  ? do_wait_intr_irq+0x7a/0x7a
[  247.027996]  blk_cleanup_queue+0x4b/0xc9
[  247.028000]  __scsi_remove_device+0xf6/0x14e
[  247.028002]  scsi_remove_device+0x21/0x2b
[  247.029037]  sdev_store_delete+0x58/0x7c
[  247.029041]  kernfs_fop_write+0x10d/0x14f
[  247.031281]  vfs_write+0xa2/0xdf
[  247.032670]  ksys_write+0x6b/0xb3
[  247.032673]  do_syscall_64+0x56/0x82
[  247.034053]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  247.034059] RIP: 0033:0x7f69f39e9008
[  247.036330] Code: Bad RIP value.
[  247.036331] RSP: 002b:00007ffdd8116498 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  247.037613] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f69f39e9008
[  247.039714] RDX: 0000000000000002 RSI: 000055cde92a0ab0 RDI: 0000000000000001
[  247.039715] RBP: 000055cde92a0ab0 R08: 000000000000000a R09: 00007f69f3a79e80
[  247.039716] R10: 000000000000000a R11: 0000000000000246 R12: 00007f69f3abb780
[  247.039717] R13: 0000000000000002 R14: 00007f69f3ab6740 R15: 0000000000000002

Cc: linux-block@vger.kernel.org
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/scsi_lib.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 534b85e87c80..4d7fab9e8af9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1694,6 +1694,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		 */
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
+
+		/*
+		 * Requests may be held in block layer queue because of
+		 * resource contention. We usually run queue in normal
+		 * completion for queuing these requests again. Block layer
+		 * will finish this failed request simply, run queue in case
+		 * of IO queueing failure so that requests can get chance to
+		 * be finished.
+		 */
+		scsi_run_queue(q);
 		break;
 	}
 	return ret;
-- 
2.25.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: core: run queue in case of IO queueing failure
  2020-07-08 13:14 [PATCH] scsi: core: run queue in case of IO queueing failure Ming Lei
@ 2020-07-14  0:37 ` Ming Lei
  2020-07-17 14:23   ` Ming Lei
  2020-07-18 20:22 ` Bart Van Assche
  1 sibling, 1 reply; 6+ messages in thread
From: Ming Lei @ 2020-07-14  0:37 UTC (permalink / raw)
  To: James Bottomley, linux-scsi, Martin K . Petersen
  Cc: linux-block, Christoph Hellwig

On Wed, Jul 08, 2020 at 09:14:05PM +0800, Ming Lei wrote:
> IO requests may be held in scheduler queue because of resource contention.
> However, not like normal completion, when queueing request failed, we don't
> ask block layer to queue these requests, so IO hang[1] is caused.
> 
> Fix this issue by run queue when IO request failure happens.
> 
> [1] IO hang log by running heavy IO with removing scsi device
> 
> [   39.054963] scsi 13:0:0:0: rejecting I/O to dead device
> [   39.058700] scsi 13:0:0:0: rejecting I/O to dead device
> [   39.087855] sd 13:0:0:1: [sdd] Synchronizing SCSI cache
> [   39.088909] scsi 13:0:0:1: rejecting I/O to dead device
> [   39.095351] scsi 13:0:0:1: rejecting I/O to dead device
> [   39.096962] scsi 13:0:0:1: rejecting I/O to dead device
> [  247.021859] INFO: task scsi-stress-rem:813 blocked for more than 122 seconds.
> [  247.023258]       Not tainted 5.8.0-rc2 #8
> [  247.024069] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [  247.025331] scsi-stress-rem D    0   813    802 0x00004000
> [  247.025334] Call Trace:
> [  247.025354]  __schedule+0x504/0x55f
> [  247.027987]  schedule+0x72/0xa8
> [  247.027991]  blk_mq_freeze_queue_wait+0x63/0x8c
> [  247.027994]  ? do_wait_intr_irq+0x7a/0x7a
> [  247.027996]  blk_cleanup_queue+0x4b/0xc9
> [  247.028000]  __scsi_remove_device+0xf6/0x14e
> [  247.028002]  scsi_remove_device+0x21/0x2b
> [  247.029037]  sdev_store_delete+0x58/0x7c
> [  247.029041]  kernfs_fop_write+0x10d/0x14f
> [  247.031281]  vfs_write+0xa2/0xdf
> [  247.032670]  ksys_write+0x6b/0xb3
> [  247.032673]  do_syscall_64+0x56/0x82
> [  247.034053]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [  247.034059] RIP: 0033:0x7f69f39e9008
> [  247.036330] Code: Bad RIP value.
> [  247.036331] RSP: 002b:00007ffdd8116498 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> [  247.037613] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f69f39e9008
> [  247.039714] RDX: 0000000000000002 RSI: 000055cde92a0ab0 RDI: 0000000000000001
> [  247.039715] RBP: 000055cde92a0ab0 R08: 000000000000000a R09: 00007f69f3a79e80
> [  247.039716] R10: 000000000000000a R11: 0000000000000246 R12: 00007f69f3abb780
> [  247.039717] R13: 0000000000000002 R14: 00007f69f3ab6740 R15: 0000000000000002
> 
> Cc: linux-block@vger.kernel.org
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/scsi/scsi_lib.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 534b85e87c80..4d7fab9e8af9 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1694,6 +1694,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
>  		 */
>  		if (req->rq_flags & RQF_DONTPREP)
>  			scsi_mq_uninit_cmd(cmd);
> +
> +		/*
> +		 * Requests may be held in block layer queue because of
> +		 * resource contention. We usually run queue in normal
> +		 * completion for queuing these requests again. Block layer
> +		 * will finish this failed request simply, run queue in case
> +		 * of IO queueing failure so that requests can get chance to
> +		 * be finished.
> +		 */
> +		scsi_run_queue(q);
>  		break;
>  	}
>  	return ret;

Hello Guys,

Ping...


Thanks,
Ming


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: core: run queue in case of IO queueing failure
  2020-07-14  0:37 ` Ming Lei
@ 2020-07-17 14:23   ` Ming Lei
  0 siblings, 0 replies; 6+ messages in thread
From: Ming Lei @ 2020-07-17 14:23 UTC (permalink / raw)
  To: Ming Lei
  Cc: James Bottomley, Linux SCSI List, Martin K . Petersen,
	linux-block, Christoph Hellwig, Linux Kernel Mailing List

On Tue, Jul 14, 2020 at 8:38 AM Ming Lei <ming.lei@redhat.com> wrote:
>
> On Wed, Jul 08, 2020 at 09:14:05PM +0800, Ming Lei wrote:
> > IO requests may be held in scheduler queue because of resource contention.
> > However, not like normal completion, when queueing request failed, we don't
> > ask block layer to queue these requests, so IO hang[1] is caused.
> >
> > Fix this issue by run queue when IO request failure happens.
> >
> > [1] IO hang log by running heavy IO with removing scsi device
> >
> > [   39.054963] scsi 13:0:0:0: rejecting I/O to dead device
> > [   39.058700] scsi 13:0:0:0: rejecting I/O to dead device
> > [   39.087855] sd 13:0:0:1: [sdd] Synchronizing SCSI cache
> > [   39.088909] scsi 13:0:0:1: rejecting I/O to dead device
> > [   39.095351] scsi 13:0:0:1: rejecting I/O to dead device
> > [   39.096962] scsi 13:0:0:1: rejecting I/O to dead device
> > [  247.021859] INFO: task scsi-stress-rem:813 blocked for more than 122 seconds.
> > [  247.023258]       Not tainted 5.8.0-rc2 #8
> > [  247.024069] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > [  247.025331] scsi-stress-rem D    0   813    802 0x00004000
> > [  247.025334] Call Trace:
> > [  247.025354]  __schedule+0x504/0x55f
> > [  247.027987]  schedule+0x72/0xa8
> > [  247.027991]  blk_mq_freeze_queue_wait+0x63/0x8c
> > [  247.027994]  ? do_wait_intr_irq+0x7a/0x7a
> > [  247.027996]  blk_cleanup_queue+0x4b/0xc9
> > [  247.028000]  __scsi_remove_device+0xf6/0x14e
> > [  247.028002]  scsi_remove_device+0x21/0x2b
> > [  247.029037]  sdev_store_delete+0x58/0x7c
> > [  247.029041]  kernfs_fop_write+0x10d/0x14f
> > [  247.031281]  vfs_write+0xa2/0xdf
> > [  247.032670]  ksys_write+0x6b/0xb3
> > [  247.032673]  do_syscall_64+0x56/0x82
> > [  247.034053]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > [  247.034059] RIP: 0033:0x7f69f39e9008
> > [  247.036330] Code: Bad RIP value.
> > [  247.036331] RSP: 002b:00007ffdd8116498 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> > [  247.037613] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f69f39e9008
> > [  247.039714] RDX: 0000000000000002 RSI: 000055cde92a0ab0 RDI: 0000000000000001
> > [  247.039715] RBP: 000055cde92a0ab0 R08: 000000000000000a R09: 00007f69f3a79e80
> > [  247.039716] R10: 000000000000000a R11: 0000000000000246 R12: 00007f69f3abb780
> > [  247.039717] R13: 0000000000000002 R14: 00007f69f3ab6740 R15: 0000000000000002
> >
> > Cc: linux-block@vger.kernel.org
> > Cc: Christoph Hellwig <hch@lst.de>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  drivers/scsi/scsi_lib.c | 10 ++++++++++
> >  1 file changed, 10 insertions(+)
> >
> > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> > index 534b85e87c80..4d7fab9e8af9 100644
> > --- a/drivers/scsi/scsi_lib.c
> > +++ b/drivers/scsi/scsi_lib.c
> > @@ -1694,6 +1694,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
> >                */
> >               if (req->rq_flags & RQF_DONTPREP)
> >                       scsi_mq_uninit_cmd(cmd);
> > +
> > +             /*
> > +              * Requests may be held in block layer queue because of
> > +              * resource contention. We usually run queue in normal
> > +              * completion for queuing these requests again. Block layer
> > +              * will finish this failed request simply, run queue in case
> > +              * of IO queueing failure so that requests can get chance to
> > +              * be finished.
> > +              */
> > +             scsi_run_queue(q);
> >               break;
> >       }
> >       return ret;
>
> Hello Guys,
>
> Ping...

Ping again...


-- 
Ming Lei

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: core: run queue in case of IO queueing failure
  2020-07-08 13:14 [PATCH] scsi: core: run queue in case of IO queueing failure Ming Lei
  2020-07-14  0:37 ` Ming Lei
@ 2020-07-18 20:22 ` Bart Van Assche
  2020-07-20  1:32   ` Ming Lei
  1 sibling, 1 reply; 6+ messages in thread
From: Bart Van Assche @ 2020-07-18 20:22 UTC (permalink / raw)
  To: Ming Lei, James Bottomley, linux-scsi, Martin K . Petersen
  Cc: linux-block, Christoph Hellwig

On 2020-07-08 06:14, Ming Lei wrote:
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 534b85e87c80..4d7fab9e8af9 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1694,6 +1694,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
>  		 */
>  		if (req->rq_flags & RQF_DONTPREP)
>  			scsi_mq_uninit_cmd(cmd);
> +
> +		/*
> +		 * Requests may be held in block layer queue because of
> +		 * resource contention. We usually run queue in normal
> +		 * completion for queuing these requests again. Block layer
> +		 * will finish this failed request simply, run queue in case
> +		 * of IO queueing failure so that requests can get chance to
> +		 * be finished.
> +		 */
> +		scsi_run_queue(q);
>  		break;
>  	}
>  	return ret;

So this patch causes blk_mq_run_hw_queues() to be called synchronously
from inside blk_mq_run_hw_queues()? Wouldn't it be better to avoid such
recursion and to run the queue asynchronously instead of synchronously
from inside scsi_queue_rq()? The following code already exists in
scsi_end_request():

	blk_mq_run_hw_queues(q, true);

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: core: run queue in case of IO queueing failure
  2020-07-18 20:22 ` Bart Van Assche
@ 2020-07-20  1:32   ` Ming Lei
  2020-07-20  2:26     ` Bart Van Assche
  0 siblings, 1 reply; 6+ messages in thread
From: Ming Lei @ 2020-07-20  1:32 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: James Bottomley, linux-scsi, Martin K . Petersen, linux-block,
	Christoph Hellwig

On Sat, Jul 18, 2020 at 01:22:27PM -0700, Bart Van Assche wrote:
> On 2020-07-08 06:14, Ming Lei wrote:
> > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> > index 534b85e87c80..4d7fab9e8af9 100644
> > --- a/drivers/scsi/scsi_lib.c
> > +++ b/drivers/scsi/scsi_lib.c
> > @@ -1694,6 +1694,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
> >  		 */
> >  		if (req->rq_flags & RQF_DONTPREP)
> >  			scsi_mq_uninit_cmd(cmd);
> > +
> > +		/*
> > +		 * Requests may be held in block layer queue because of
> > +		 * resource contention. We usually run queue in normal
> > +		 * completion for queuing these requests again. Block layer
> > +		 * will finish this failed request simply, run queue in case
> > +		 * of IO queueing failure so that requests can get chance to
> > +		 * be finished.
> > +		 */
> > +		scsi_run_queue(q);
> >  		break;
> >  	}
> >  	return ret;
> 
> So this patch causes blk_mq_run_hw_queues() to be called synchronously
> from inside blk_mq_run_hw_queues()? Wouldn't it be better to avoid such

OK, look this patch may risk to overflow stack.

> recursion and to run the queue asynchronously instead of synchronously
> from inside scsi_queue_rq()? The following code already exists in
> scsi_end_request():
> 
> 	blk_mq_run_hw_queues(q, true);

How about the following change?

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b9adee0a9266..9798fbffe307 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -564,6 +564,15 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
 	scsi_uninit_cmd(cmd);
 }
 
+static void scsi_run_queue_async(struct scsi_device *sdev)
+{
+	if (scsi_target(sdev)->single_lun ||
+	    !list_empty(&sdev->host->starved_list))
+		kblockd_schedule_work(&sdev->requeue_work);
+	else
+		blk_mq_run_hw_queues(sdev->request_queue, true);
+}
+
 /* Returns false when no more bytes to process, true if there are more */
 static bool scsi_end_request(struct request *req, blk_status_t error,
 		unsigned int bytes)
@@ -608,11 +617,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
 
 	__blk_mq_end_request(req, error);
 
-	if (scsi_target(sdev)->single_lun ||
-	    !list_empty(&sdev->host->starved_list))
-		kblockd_schedule_work(&sdev->requeue_work);
-	else
-		blk_mq_run_hw_queues(q, true);
+	scsi_run_queue_async(sdev);
 
 	percpu_ref_put(&q->q_usage_counter);
 	return false;
@@ -1721,6 +1726,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		 */
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
+		scsi_run_queue_async(sdev);
 		break;
 	}
 	return ret;


Thanks,
Ming


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] scsi: core: run queue in case of IO queueing failure
  2020-07-20  1:32   ` Ming Lei
@ 2020-07-20  2:26     ` Bart Van Assche
  0 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2020-07-20  2:26 UTC (permalink / raw)
  To: Ming Lei
  Cc: James Bottomley, linux-scsi, Martin K . Petersen, linux-block,
	Christoph Hellwig

On 2020-07-19 18:32, Ming Lei wrote:
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index b9adee0a9266..9798fbffe307 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -564,6 +564,15 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
>  	scsi_uninit_cmd(cmd);
>  }
>  
> +static void scsi_run_queue_async(struct scsi_device *sdev)
> +{
> +	if (scsi_target(sdev)->single_lun ||
> +	    !list_empty(&sdev->host->starved_list))
> +		kblockd_schedule_work(&sdev->requeue_work);
> +	else
> +		blk_mq_run_hw_queues(sdev->request_queue, true);
> +}
> +
>  /* Returns false when no more bytes to process, true if there are more */
>  static bool scsi_end_request(struct request *req, blk_status_t error,
>  		unsigned int bytes)
> @@ -608,11 +617,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
>  
>  	__blk_mq_end_request(req, error);
>  
> -	if (scsi_target(sdev)->single_lun ||
> -	    !list_empty(&sdev->host->starved_list))
> -		kblockd_schedule_work(&sdev->requeue_work);
> -	else
> -		blk_mq_run_hw_queues(q, true);
> +	scsi_run_queue_async(sdev);
>  
>  	percpu_ref_put(&q->q_usage_counter);
>  	return false;
> @@ -1721,6 +1726,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
>  		 */
>  		if (req->rq_flags & RQF_DONTPREP)
>  			scsi_mq_uninit_cmd(cmd);
> +		scsi_run_queue_async(sdev);
>  		break;
>  	}
>  	return ret;

Looks good to me. Feel free to add:

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-07-20  2:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-08 13:14 [PATCH] scsi: core: run queue in case of IO queueing failure Ming Lei
2020-07-14  0:37 ` Ming Lei
2020-07-17 14:23   ` Ming Lei
2020-07-18 20:22 ` Bart Van Assche
2020-07-20  1:32   ` Ming Lei
2020-07-20  2:26     ` Bart Van Assche

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.