All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: mq-deadline: Fix queue restart handling
@ 2019-10-05  3:03 Damien Le Moal
  2019-10-06  0:01 ` Sasha Levin
  0 siblings, 1 reply; 7+ messages in thread
From: Damien Le Moal @ 2019-10-05  3:03 UTC (permalink / raw)
  To: stable, Greg Kroah-Hartman; +Cc: Jens Axboe

[ Upstream commit cb8acabbe33b110157955a7425ee876fb81e6bbc ]

Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
handling") added a call to blk_mq_sched_mark_restart_hctx() in
dd_dispatch_request() to make sure that write request dispatching does
not stall when all target zones are locked. This fix left a subtle race
when a write completion happens during a dispatch execution on another
CPU:

CPU 0: Dispatch			CPU1: write completion

dd_dispatch_request()
    lock(&dd->lock);
    ...
    lock(&dd->zone_lock);	dd_finish_request()
    rq = find request		lock(&dd->zone_lock);
    unlock(&dd->zone_lock);
    				zone write unlock
				unlock(&dd->zone_lock);
				...
				__blk_mq_free_request
                                      check restart flag (not set)
				      -> queue not run
    ...
    if (!rq && have writes)
        blk_mq_sched_mark_restart_hctx()
    unlock(&dd->lock)

Since the dispatch context finishes after the write request completion
handling, marking the queue as needing a restart is not seen from
__blk_mq_free_request() and blk_mq_sched_restart() not executed leading
to the dispatch stall under 100% write workloads.

Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
dd_dispatch_request() into dd_finish_request() under the zone lock to
ensure full mutual exclusion between write request dispatch selection
and zone unlock on write request completion.

Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
Cc: stable@vger.kernel.org
Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index d5e21ce44d2c..69094d641062 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -376,13 +376,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
  * hardware queue, but we may return a request that is for a
  * different hardware queue. This is because mq-deadline has shared
  * state for all hardware queues, in terms of sorting, FIFOs, etc.
- *
- * For a zoned block device, __dd_dispatch_request() may return NULL
- * if all the queued write requests are directed at zones that are already
- * locked due to on-going write requests. In this case, make sure to mark
- * the queue as needing a restart to ensure that the queue is run again
- * and the pending writes dispatched once the target zones for the ongoing
- * write requests are unlocked in dd_finish_request().
  */
 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
@@ -391,9 +384,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 
 	spin_lock(&dd->lock);
 	rq = __dd_dispatch_request(dd);
-	if (!rq && blk_queue_is_zoned(hctx->queue) &&
-	    !list_empty(&dd->fifo_list[WRITE]))
-		blk_mq_sched_mark_restart_hctx(hctx);
 	spin_unlock(&dd->lock);
 
 	return rq;
@@ -559,6 +549,13 @@ static void dd_prepare_request(struct request *rq, struct bio *bio)
  * spinlock so that the zone is never unlocked while deadline_fifo_request()
  * or deadline_next_request() are executing. This function is called for
  * all requests, whether or not these requests complete successfully.
+ *
+ * For a zoned block device, __dd_dispatch_request() may have stopped
+ * dispatching requests if all the queued requests are write requests directed
+ * at zones that are already locked due to on-going write requests. To ensure
+ * write request dispatch progress in this case, mark the queue as needing a
+ * restart to ensure that the queue is run again after completion of the
+ * request and zones being unlocked.
  */
 static void dd_finish_request(struct request *rq)
 {
@@ -570,6 +567,12 @@ static void dd_finish_request(struct request *rq)
 
 		spin_lock_irqsave(&dd->zone_lock, flags);
 		blk_req_zone_write_unlock(rq);
+		if (!list_empty(&dd->fifo_list[WRITE])) {
+			struct blk_mq_hw_ctx *hctx;
+
+			hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
+			blk_mq_sched_mark_restart_hctx(hctx);
+		}
 		spin_unlock_irqrestore(&dd->zone_lock, flags);
 	}
 }
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] block: mq-deadline: Fix queue restart handling
  2019-10-05  3:03 [PATCH] block: mq-deadline: Fix queue restart handling Damien Le Moal
@ 2019-10-06  0:01 ` Sasha Levin
  0 siblings, 0 replies; 7+ messages in thread
From: Sasha Levin @ 2019-10-06  0:01 UTC (permalink / raw)
  To: Damien Le Moal; +Cc: stable, Greg Kroah-Hartman, Jens Axboe

On Sat, Oct 05, 2019 at 12:03:18PM +0900, Damien Le Moal wrote:
>[ Upstream commit cb8acabbe33b110157955a7425ee876fb81e6bbc ]
>
>Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
>handling") added a call to blk_mq_sched_mark_restart_hctx() in
>dd_dispatch_request() to make sure that write request dispatching does
>not stall when all target zones are locked. This fix left a subtle race
>when a write completion happens during a dispatch execution on another
>CPU:
>
>CPU 0: Dispatch			CPU1: write completion
>
>dd_dispatch_request()
>    lock(&dd->lock);
>    ...
>    lock(&dd->zone_lock);	dd_finish_request()
>    rq = find request		lock(&dd->zone_lock);
>    unlock(&dd->zone_lock);
>    				zone write unlock
>				unlock(&dd->zone_lock);
>				...
>				__blk_mq_free_request
>                                      check restart flag (not set)
>				      -> queue not run
>    ...
>    if (!rq && have writes)
>        blk_mq_sched_mark_restart_hctx()
>    unlock(&dd->lock)
>
>Since the dispatch context finishes after the write request completion
>handling, marking the queue as needing a restart is not seen from
>__blk_mq_free_request() and blk_mq_sched_restart() not executed leading
>to the dispatch stall under 100% write workloads.
>
>Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
>dd_dispatch_request() into dd_finish_request() under the zone lock to
>ensure full mutual exclusion between write request dispatch selection
>and zone unlock on write request completion.
>
>Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
>Cc: stable@vger.kernel.org
>Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
>Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
>Reviewed-by: Christoph Hellwig <hch@lst.de>
>Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
>Signed-off-by: Jens Axboe <axboe@kernel.dk>

I've queued it up for 4.19, thanks!

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] block: mq-deadline: Fix queue restart handling
  2019-08-28  4:40 Damien Le Moal
                   ` (2 preceding siblings ...)
  2019-09-03  9:05 ` Christoph Hellwig
@ 2019-09-03 14:00 ` Jens Axboe
  3 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2019-09-03 14:00 UTC (permalink / raw)
  To: Damien Le Moal, linux-block; +Cc: Hans Holmberg

On 8/27/19 10:40 PM, Damien Le Moal wrote:
> Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
> handling") added a call to blk_mq_sched_mark_restart_hctx() in
> dd_dispatch_request() to make sure that write request dispatching does
> not stall when all target zones are locked. This fix left a subtle race
> when a write completion happens during a dispatch execution on another
> CPU:
> 
> CPU 0: Dispatch			CPU1: write completion
> 
> dd_dispatch_request()
>      lock(&dd->lock);
>      ...
>      lock(&dd->zone_lock);	dd_finish_request()
>      rq = find request		lock(&dd->zone_lock);
>      unlock(&dd->zone_lock);
>      				zone write unlock
> 				unlock(&dd->zone_lock);
> 				...
> 				__blk_mq_free_request
>                                        check restart flag (not set)
> 				      -> queue not run
>      ...
>      if (!rq && have writes)
>          blk_mq_sched_mark_restart_hctx()
>      unlock(&dd->lock)
> 
> Since the dispatch context finishes after the write request completion
> handling, marking the queue as needing a restart is not seen from
> __blk_mq_free_request() and blk_mq_sched_restart() not executed leading
> to the dispatch stall under 100% write workloads.
> 
> Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
> dd_dispatch_request() into dd_finish_request() under the zone lock to
> ensure full mutual exclusion between write request dispatch selection
> and zone unlock on write request completion.

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] block: mq-deadline: Fix queue restart handling
  2019-08-28  4:40 Damien Le Moal
  2019-08-28  7:41 ` Hans Holmberg
  2019-08-28 15:04 ` Sasha Levin
@ 2019-09-03  9:05 ` Christoph Hellwig
  2019-09-03 14:00 ` Jens Axboe
  3 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2019-09-03  9:05 UTC (permalink / raw)
  To: Damien Le Moal; +Cc: linux-block, Jens Axboe, Hans Holmberg

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] block: mq-deadline: Fix queue restart handling
  2019-08-28  4:40 Damien Le Moal
  2019-08-28  7:41 ` Hans Holmberg
@ 2019-08-28 15:04 ` Sasha Levin
  2019-09-03  9:05 ` Christoph Hellwig
  2019-09-03 14:00 ` Jens Axboe
  3 siblings, 0 replies; 7+ messages in thread
From: Sasha Levin @ 2019-08-28 15:04 UTC (permalink / raw)
  To: Sasha Levin, Damien Le Moal, linux-block, Jens Axboe
  Cc: Hans Holmberg, stable, stable

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 548 bytes --]

Hi,

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag,
fixing commit: 7211aef86f79 block: mq-deadline: Fix write completion handling.

The bot has tested the following trees: v5.2.10, v4.19.68.

v5.2.10: Build OK!
v4.19.68: Build failed! Errors:
    block/mq-deadline.c:571:39: error: ‘struct request’ has no member named ‘mq_hctx’; did you mean ‘mq_ctx’?


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

--
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] block: mq-deadline: Fix queue restart handling
  2019-08-28  4:40 Damien Le Moal
@ 2019-08-28  7:41 ` Hans Holmberg
  2019-08-28 15:04 ` Sasha Levin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Hans Holmberg @ 2019-08-28  7:41 UTC (permalink / raw)
  To: Damien Le Moal, linux-block, Jens Axboe

On 2019-08-28 06:40, Damien Le Moal wrote:
> Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
> handling") added a call to blk_mq_sched_mark_restart_hctx() in
> dd_dispatch_request() to make sure that write request dispatching does
> not stall when all target zones are locked. This fix left a subtle race
> when a write completion happens during a dispatch execution on another
> CPU:
> 
> CPU 0: Dispatch			CPU1: write completion
> 
> dd_dispatch_request()
>      lock(&dd->lock);
>      ...
>      lock(&dd->zone_lock);	dd_finish_request()
>      rq = find request		lock(&dd->zone_lock);
>      unlock(&dd->zone_lock);
>      				zone write unlock
> 				unlock(&dd->zone_lock);
> 				...
> 				__blk_mq_free_request
>                                        check restart flag (not set)
> 				      -> queue not run
>      ...
>      if (!rq && have writes)
>          blk_mq_sched_mark_restart_hctx()
>      unlock(&dd->lock)
> 
> Since the dispatch context finishes after the write request completion
> handling, marking the queue as needing a restart is not seen from
> __blk_mq_free_request() and blk_mq_sched_restart() not executed leading
> to the dispatch stall under 100% write workloads.
> 
> Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
> dd_dispatch_request() into dd_finish_request() under the zone lock to
> ensure full mutual exclusion between write request dispatch selection
> and zone unlock on write request completion.
> 
> Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
> Cc: stable@vger.kernel.org
> Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
>   block/mq-deadline.c | 19 +++++++++----------
>   1 file changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index a17466f310f4..b490f47fd553 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -377,13 +377,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
>    * hardware queue, but we may return a request that is for a
>    * different hardware queue. This is because mq-deadline has shared
>    * state for all hardware queues, in terms of sorting, FIFOs, etc.
> - *
> - * For a zoned block device, __dd_dispatch_request() may return NULL
> - * if all the queued write requests are directed at zones that are already
> - * locked due to on-going write requests. In this case, make sure to mark
> - * the queue as needing a restart to ensure that the queue is run again
> - * and the pending writes dispatched once the target zones for the ongoing
> - * write requests are unlocked in dd_finish_request().
>    */
>   static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>   {
> @@ -392,9 +385,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>   
>   	spin_lock(&dd->lock);
>   	rq = __dd_dispatch_request(dd);
> -	if (!rq && blk_queue_is_zoned(hctx->queue) &&
> -	    !list_empty(&dd->fifo_list[WRITE]))
> -		blk_mq_sched_mark_restart_hctx(hctx);
>   	spin_unlock(&dd->lock);
>   
>   	return rq;
> @@ -561,6 +551,13 @@ static void dd_prepare_request(struct request *rq, struct bio *bio)
>    * spinlock so that the zone is never unlocked while deadline_fifo_request()
>    * or deadline_next_request() are executing. This function is called for
>    * all requests, whether or not these requests complete successfully.
> + *
> + * For a zoned block device, __dd_dispatch_request() may have stopped
> + * dispatching requests if all the queued requests are write requests directed
> + * at zones that are already locked due to on-going write requests. To ensure
> + * write request dispatch progress in this case, mark the queue as needing a
> + * restart to ensure that the queue is run again after completion of the
> + * request and zones being unlocked.
>    */
>   static void dd_finish_request(struct request *rq)
>   {
> @@ -572,6 +569,8 @@ static void dd_finish_request(struct request *rq)
>   
>   		spin_lock_irqsave(&dd->zone_lock, flags);
>   		blk_req_zone_write_unlock(rq);
> +		if (!list_empty(&dd->fifo_list[WRITE]))
> +			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
>   		spin_unlock_irqrestore(&dd->zone_lock, flags);
>   	}
>   }
> 

Looks good to me.

Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] block: mq-deadline: Fix queue restart handling
@ 2019-08-28  4:40 Damien Le Moal
  2019-08-28  7:41 ` Hans Holmberg
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Damien Le Moal @ 2019-08-28  4:40 UTC (permalink / raw)
  To: linux-block, Jens Axboe; +Cc: Hans Holmberg

Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
handling") added a call to blk_mq_sched_mark_restart_hctx() in
dd_dispatch_request() to make sure that write request dispatching does
not stall when all target zones are locked. This fix left a subtle race
when a write completion happens during a dispatch execution on another
CPU:

CPU 0: Dispatch			CPU1: write completion

dd_dispatch_request()
    lock(&dd->lock);
    ...
    lock(&dd->zone_lock);	dd_finish_request()
    rq = find request		lock(&dd->zone_lock);
    unlock(&dd->zone_lock);
    				zone write unlock
				unlock(&dd->zone_lock);
				...
				__blk_mq_free_request
                                      check restart flag (not set)
				      -> queue not run
    ...
    if (!rq && have writes)
        blk_mq_sched_mark_restart_hctx()
    unlock(&dd->lock)

Since the dispatch context finishes after the write request completion
handling, marking the queue as needing a restart is not seen from
__blk_mq_free_request() and blk_mq_sched_restart() not executed leading
to the dispatch stall under 100% write workloads.

Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
dd_dispatch_request() into dd_finish_request() under the zone lock to
ensure full mutual exclusion between write request dispatch selection
and zone unlock on write request completion.

Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
Cc: stable@vger.kernel.org
Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 block/mq-deadline.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index a17466f310f4..b490f47fd553 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -377,13 +377,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
  * hardware queue, but we may return a request that is for a
  * different hardware queue. This is because mq-deadline has shared
  * state for all hardware queues, in terms of sorting, FIFOs, etc.
- *
- * For a zoned block device, __dd_dispatch_request() may return NULL
- * if all the queued write requests are directed at zones that are already
- * locked due to on-going write requests. In this case, make sure to mark
- * the queue as needing a restart to ensure that the queue is run again
- * and the pending writes dispatched once the target zones for the ongoing
- * write requests are unlocked in dd_finish_request().
  */
 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
@@ -392,9 +385,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 
 	spin_lock(&dd->lock);
 	rq = __dd_dispatch_request(dd);
-	if (!rq && blk_queue_is_zoned(hctx->queue) &&
-	    !list_empty(&dd->fifo_list[WRITE]))
-		blk_mq_sched_mark_restart_hctx(hctx);
 	spin_unlock(&dd->lock);
 
 	return rq;
@@ -561,6 +551,13 @@ static void dd_prepare_request(struct request *rq, struct bio *bio)
  * spinlock so that the zone is never unlocked while deadline_fifo_request()
  * or deadline_next_request() are executing. This function is called for
  * all requests, whether or not these requests complete successfully.
+ *
+ * For a zoned block device, __dd_dispatch_request() may have stopped
+ * dispatching requests if all the queued requests are write requests directed
+ * at zones that are already locked due to on-going write requests. To ensure
+ * write request dispatch progress in this case, mark the queue as needing a
+ * restart to ensure that the queue is run again after completion of the
+ * request and zones being unlocked.
  */
 static void dd_finish_request(struct request *rq)
 {
@@ -572,6 +569,8 @@ static void dd_finish_request(struct request *rq)
 
 		spin_lock_irqsave(&dd->zone_lock, flags);
 		blk_req_zone_write_unlock(rq);
+		if (!list_empty(&dd->fifo_list[WRITE]))
+			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
 		spin_unlock_irqrestore(&dd->zone_lock, flags);
 	}
 }
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-10-06  0:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-05  3:03 [PATCH] block: mq-deadline: Fix queue restart handling Damien Le Moal
2019-10-06  0:01 ` Sasha Levin
  -- strict thread matches above, loose matches on Subject: below --
2019-08-28  4:40 Damien Le Moal
2019-08-28  7:41 ` Hans Holmberg
2019-08-28 15:04 ` Sasha Levin
2019-09-03  9:05 ` Christoph Hellwig
2019-09-03 14:00 ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.