All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] blk-mq: unify hardware queue run handlers
@ 2017-04-10 15:54 Jens Axboe
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Jens Axboe @ 2017-04-10 15:54 UTC (permalink / raw)
  To: linux-block; +Cc: osandov, hch, bart.vanassche

We currently have three of them, one normal work queue item, and two
delayed work queue items. The two delayed items differ in that one of
them only runs the queue it was previously stopped, that's it. The
non-delayed one is identical to the non stopped checking delayed
variant.

Sending this out for early review, as I'll be heading on vacation
shortly. This is untested, just compiled.

This shrinks the size of a hardware queue from 832 bytes (13 cachelines)
to 704 bytes (11 cachelines) on my setup. That's quite a substantial
win.

Patches are against my 4.12 branch.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-10 15:54 [PATCH 0/3] blk-mq: unify hardware queue run handlers Jens Axboe
@ 2017-04-10 15:54 ` Jens Axboe
  2017-04-10 16:09   ` Christoph Hellwig
                     ` (3 more replies)
  2017-04-10 15:54 ` [PATCH 2/3] block: add kblock_mod_delayed_work_on() Jens Axboe
  2017-04-10 15:54 ` [PATCH 3/3] blk-mq: unify hctx delay_work and run_work Jens Axboe
  2 siblings, 4 replies; 15+ messages in thread
From: Jens Axboe @ 2017-04-10 15:54 UTC (permalink / raw)
  To: linux-block; +Cc: osandov, hch, bart.vanassche, Jens Axboe

They serve the exact same purpose. Get rid of the non-delayed
work variant, and just run it without delay for the normal case.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 +-
 block/blk-mq.c         | 27 ++++++---------------------
 include/linux/blk-mq.h |  3 +--
 3 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 8654aa0cef6d..d58541e4dc7b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -269,7 +269,7 @@ void blk_sync_queue(struct request_queue *q)
 		int i;
 
 		queue_for_each_hw_ctx(q, hctx, i) {
-			cancel_work_sync(&hctx->run_work);
+			cancel_delayed_work_sync(&hctx->run_work);
 			cancel_delayed_work_sync(&hctx->delay_work);
 		}
 	} else {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2ef7b460924..7afba6ab5a96 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1168,13 +1168,9 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 		put_cpu();
 	}
 
-	if (msecs == 0)
-		kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx),
-					 &hctx->run_work);
-	else
-		kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
-						 &hctx->delayed_run_work,
-						 msecs_to_jiffies(msecs));
+	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+					 &hctx->run_work,
+					 msecs_to_jiffies(msecs));
 }
 
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
@@ -1226,7 +1222,7 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
-	cancel_work(&hctx->run_work);
+	cancel_delayed_work(&hctx->run_work);
 	cancel_delayed_work(&hctx->delay_work);
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
@@ -1284,17 +1280,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 {
 	struct blk_mq_hw_ctx *hctx;
 
-	hctx = container_of(work, struct blk_mq_hw_ctx, run_work);
-
-	__blk_mq_run_hw_queue(hctx);
-}
-
-static void blk_mq_delayed_run_work_fn(struct work_struct *work)
-{
-	struct blk_mq_hw_ctx *hctx;
-
-	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work);
-
+	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
 	__blk_mq_run_hw_queue(hctx);
 }
 
@@ -1899,8 +1885,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	if (node == NUMA_NO_NODE)
 		node = hctx->numa_node = set->numa_node;
 
-	INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
-	INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn);
+	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
 	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d75de612845d..2b4573a9ccf4 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -15,7 +15,7 @@ struct blk_mq_hw_ctx {
 		unsigned long		state;		/* BLK_MQ_S_* flags */
 	} ____cacheline_aligned_in_smp;
 
-	struct work_struct	run_work;
+	struct delayed_work	run_work;
 	cpumask_var_t		cpumask;
 	int			next_cpu;
 	int			next_cpu_batch;
@@ -51,7 +51,6 @@ struct blk_mq_hw_ctx {
 
 	atomic_t		nr_active;
 
-	struct delayed_work	delayed_run_work;
 	struct delayed_work	delay_work;
 
 	struct hlist_node	cpuhp_dead;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] block: add kblock_mod_delayed_work_on()
  2017-04-10 15:54 [PATCH 0/3] blk-mq: unify hardware queue run handlers Jens Axboe
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
@ 2017-04-10 15:54 ` Jens Axboe
  2017-04-10 16:10   ` Christoph Hellwig
  2017-04-10 16:17   ` Bart Van Assche
  2017-04-10 15:54 ` [PATCH 3/3] blk-mq: unify hctx delay_work and run_work Jens Axboe
  2 siblings, 2 replies; 15+ messages in thread
From: Jens Axboe @ 2017-04-10 15:54 UTC (permalink / raw)
  To: linux-block; +Cc: osandov, hch, bart.vanassche, Jens Axboe

This modifies (or adds, if not currently pending) an existing
delayed work item.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 7 +++++++
 include/linux/blkdev.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index d58541e4dc7b..bffb8640346b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3135,6 +3135,13 @@ int kblockd_schedule_work_on(int cpu, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work_on);
 
+int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
+				unsigned long delay)
+{
+	return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
+
 int kblockd_schedule_delayed_work(struct delayed_work *dwork,
 				  unsigned long delay)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ec993573e0a8..71b978dedbbc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1699,6 +1699,7 @@ int kblockd_schedule_work(struct work_struct *work);
 int kblockd_schedule_work_on(int cpu, struct work_struct *work);
 int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
 int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
+int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
 
 #ifdef CONFIG_BLK_CGROUP
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] blk-mq: unify hctx delay_work and run_work
  2017-04-10 15:54 [PATCH 0/3] blk-mq: unify hardware queue run handlers Jens Axboe
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
  2017-04-10 15:54 ` [PATCH 2/3] block: add kblock_mod_delayed_work_on() Jens Axboe
@ 2017-04-10 15:54 ` Jens Axboe
  2017-04-10 16:21   ` Bart Van Assche
  2 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2017-04-10 15:54 UTC (permalink / raw)
  To: linux-block; +Cc: osandov, hch, bart.vanassche, Jens Axboe

The only difference between ->run_work and ->delay_work, is that
the latter is used to defer running a queue. This is done by
marking the queue stopped, and scheduling ->delay_work to run
sometime in the future. While the queue is stopped, direct runs
or runs through ->run_work will not run the queue.

If we combine the handlers, then we need to handle two things:

1) If a delayed/stopped run is scheduled, then we should not run
   the queue before that has been completed.
2) If a queue is delayed/stopped, the handler needs to restart
   the queue. Normally a run of a queue with the stopped bit set
   would be a no-op.

Case 1 is handled by modifying a currently pending queue run
to the deadline set by the caller of blk_mq_delay_queue().
Subsequent attempts to queue a queue run will find the work
item already pending, and direct runs will see a stopped queue
as before.

Case 2 is handled by adding a new bit, BLK_MQ_S_START_ON_RUN,
that tells the work handler that it should clear a stopped
queue and run the handler.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  4 +---
 block/blk-mq.c         | 34 ++++++++++++++++++++++------------
 include/linux/blk-mq.h |  3 +--
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index bffb8640346b..4f0104afa848 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -268,10 +268,8 @@ void blk_sync_queue(struct request_queue *q)
 		struct blk_mq_hw_ctx *hctx;
 		int i;
 
-		queue_for_each_hw_ctx(q, hctx, i) {
+		queue_for_each_hw_ctx(q, hctx, i)
 			cancel_delayed_work_sync(&hctx->run_work);
-			cancel_delayed_work_sync(&hctx->delay_work);
-		}
 	} else {
 		cancel_delayed_work_sync(&q->delay_work);
 	}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7afba6ab5a96..e97ed8e7f359 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1223,7 +1223,6 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	cancel_delayed_work(&hctx->run_work);
-	cancel_delayed_work(&hctx->delay_work);
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
@@ -1281,27 +1280,39 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 	struct blk_mq_hw_ctx *hctx;
 
 	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
-	__blk_mq_run_hw_queue(hctx);
-}
 
-static void blk_mq_delay_work_fn(struct work_struct *work)
-{
-	struct blk_mq_hw_ctx *hctx;
+	/*
+	 * If we are stopped, don't run the queue. The exception is if
+	 * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
+	 * the STOPPED bit and run it.
+	 */
+	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
+		if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
+			return;
 
-	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
+		clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
+		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+	}
 
-	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
-		__blk_mq_run_hw_queue(hctx);
+	__blk_mq_run_hw_queue(hctx);
 }
 
+
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
 	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
 		return;
 
+	/*
+	 * Stop the hw queue, then modify currently delayed work.
+	 * This should prevent us from running the queue prematurely.
+	 * Mark the queue as auto-clearing STOPPED when it runs.
+	 */
 	blk_mq_stop_hw_queue(hctx);
-	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
-			&hctx->delay_work, msecs_to_jiffies(msecs));
+	set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
+	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+					&hctx->run_work,
+					msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_mq_delay_queue);
 
@@ -1886,7 +1897,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		node = hctx->numa_node = set->numa_node;
 
 	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
-	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
 	hctx->queue = q;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2b4573a9ccf4..7a114b7b943c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,8 +51,6 @@ struct blk_mq_hw_ctx {
 
 	atomic_t		nr_active;
 
-	struct delayed_work	delay_work;
-
 	struct hlist_node	cpuhp_dead;
 	struct kobject		kobj;
 
@@ -160,6 +158,7 @@ enum {
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 	BLK_MQ_S_SCHED_RESTART	= 2,
 	BLK_MQ_S_TAG_WAITING	= 3,
+	BLK_MQ_S_START_ON_RUN	= 4,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
@ 2017-04-10 16:09   ` Christoph Hellwig
  2017-04-10 16:17   ` Bart Van Assche
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2017-04-10 16:09 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, osandov, hch, bart.vanassche

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] block: add kblock_mod_delayed_work_on()
  2017-04-10 15:54 ` [PATCH 2/3] block: add kblock_mod_delayed_work_on() Jens Axboe
@ 2017-04-10 16:10   ` Christoph Hellwig
  2017-04-10 16:17   ` Bart Van Assche
  1 sibling, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2017-04-10 16:10 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, osandov, hch, bart.vanassche

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
  2017-04-10 16:09   ` Christoph Hellwig
@ 2017-04-10 16:17   ` Bart Van Assche
  2017-04-11 18:00   ` Bart Van Assche
  2017-04-28  2:12   ` Ming Lei
  3 siblings, 0 replies; 15+ messages in thread
From: Bart Van Assche @ 2017-04-10 16:17 UTC (permalink / raw)
  To: linux-block, axboe; +Cc: hch, osandov

On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
> They serve the exact same purpose. Get rid of the non-delayed
> work variant, and just run it without delay for the normal case.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>=

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] block: add kblock_mod_delayed_work_on()
  2017-04-10 15:54 ` [PATCH 2/3] block: add kblock_mod_delayed_work_on() Jens Axboe
  2017-04-10 16:10   ` Christoph Hellwig
@ 2017-04-10 16:17   ` Bart Van Assche
  1 sibling, 0 replies; 15+ messages in thread
From: Bart Van Assche @ 2017-04-10 16:17 UTC (permalink / raw)
  To: linux-block, axboe; +Cc: hch, osandov

On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
> This modifies (or adds, if not currently pending) an existing
> delayed work item.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>=

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] blk-mq: unify hctx delay_work and run_work
  2017-04-10 15:54 ` [PATCH 3/3] blk-mq: unify hctx delay_work and run_work Jens Axboe
@ 2017-04-10 16:21   ` Bart Van Assche
  2017-04-10 16:53     ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Bart Van Assche @ 2017-04-10 16:21 UTC (permalink / raw)
  To: linux-block, axboe; +Cc: hch, osandov

On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
> @@ -1281,27 +1280,39 @@ static void blk_mq_run_work_fn(struct work_struct=
 *work)
>  	struct blk_mq_hw_ctx *hctx;
> =20
>  	hctx =3D container_of(work, struct blk_mq_hw_ctx, run_work.work);
> -	__blk_mq_run_hw_queue(hctx);
> -}
> =20
> -static void blk_mq_delay_work_fn(struct work_struct *work)
> -{
> -	struct blk_mq_hw_ctx *hctx;
> +	/*
> +	 * If we are stopped, don't run the queue. The exception is if
> +	 * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
> +	 * the STOPPED bit and run it.
> +	 */
> +	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
> +		if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
> +			return;
> =20
> -	hctx =3D container_of(work, struct blk_mq_hw_ctx, delay_work.work);
> +		clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
> +		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
> +	}
> =20
> -	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
> -		__blk_mq_run_hw_queue(hctx);
> +	__blk_mq_run_hw_queue(hctx);
>  }
> =20
> +
>  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
>  {
>  	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
>  		return;
> =20
> +	/*
> +	 * Stop the hw queue, then modify currently delayed work.
> +	 * This should prevent us from running the queue prematurely.
> +	 * Mark the queue as auto-clearing STOPPED when it runs.
> +	 */
>  	blk_mq_stop_hw_queue(hctx);
> -	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
> -			&hctx->delay_work, msecs_to_jiffies(msecs));
> +	set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
> +	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
> +					&hctx->run_work,
> +					msecs_to_jiffies(msecs));
>  }
>  EXPORT_SYMBOL(blk_mq_delay_queue);

Hello Jens,

Is it possible for a block driver to call blk_mq_delay_queue() while
blk_mq_delay_work_fn() is running? Can that result in BLK_MQ_S_STOPPED
and=A0BLK_MQ_S_START_ON_RUN being checked by=A0blk_mq_delay_work_fn() after
blk_mq_delay_queue() has set BLK_MQ_S_STOPPED and before it has set
BLK_MQ_S_START_ON_RUN?

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] blk-mq: unify hctx delay_work and run_work
  2017-04-10 16:21   ` Bart Van Assche
@ 2017-04-10 16:53     ` Jens Axboe
  2017-04-10 17:23       ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2017-04-10 16:53 UTC (permalink / raw)
  To: Bart Van Assche, linux-block; +Cc: hch, osandov

On 04/10/2017 10:21 AM, Bart Van Assche wrote:
> On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
>> @@ -1281,27 +1280,39 @@ static void blk_mq_run_work_fn(struct work_struct *work)
>>  	struct blk_mq_hw_ctx *hctx;
>>  
>>  	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
>> -	__blk_mq_run_hw_queue(hctx);
>> -}
>>  
>> -static void blk_mq_delay_work_fn(struct work_struct *work)
>> -{
>> -	struct blk_mq_hw_ctx *hctx;
>> +	/*
>> +	 * If we are stopped, don't run the queue. The exception is if
>> +	 * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
>> +	 * the STOPPED bit and run it.
>> +	 */
>> +	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
>> +		if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
>> +			return;
>>  
>> -	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
>> +		clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
>> +		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
>> +	}
>>  
>> -	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
>> -		__blk_mq_run_hw_queue(hctx);
>> +	__blk_mq_run_hw_queue(hctx);
>>  }
>>  
>> +
>>  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
>>  {
>>  	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
>>  		return;
>>  
>> +	/*
>> +	 * Stop the hw queue, then modify currently delayed work.
>> +	 * This should prevent us from running the queue prematurely.
>> +	 * Mark the queue as auto-clearing STOPPED when it runs.
>> +	 */
>>  	blk_mq_stop_hw_queue(hctx);
>> -	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
>> -			&hctx->delay_work, msecs_to_jiffies(msecs));
>> +	set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
>> +	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
>> +					&hctx->run_work,
>> +					msecs_to_jiffies(msecs));
>>  }
>>  EXPORT_SYMBOL(blk_mq_delay_queue);
> 
> Hello Jens,
> 
> Is it possible for a block driver to call blk_mq_delay_queue() while
> blk_mq_delay_work_fn() is running? Can that result in BLK_MQ_S_STOPPED
> and BLK_MQ_S_START_ON_RUN being checked by blk_mq_delay_work_fn() after
> blk_mq_delay_queue() has set BLK_MQ_S_STOPPED and before it has set
> BLK_MQ_S_START_ON_RUN?

Yeah, I don't think it's bullet proof. I just looked at the in-kernel
users, and there's just one, nvme/fc.c. And it looks really buggy,
since it manually stops _all_ queues, then delays the one hw queue.
So we'll end up with potentially a bunch of stopped queues, and only
one getting restarted.

I think for blk_mq_delay_queue(), what we really care about is "this
queue has to run again sometime in the future". If that happens
much sooner than asked for, that's OK, the caller will just delay
again if it needs it. For most cases, we'll be close.

Obviously we can't have ordering issues and end up in a bad state,
we have to prevent that.

I'll fiddle with this a bit more.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] blk-mq: unify hctx delay_work and run_work
  2017-04-10 16:53     ` Jens Axboe
@ 2017-04-10 17:23       ` Jens Axboe
  0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2017-04-10 17:23 UTC (permalink / raw)
  To: Bart Van Assche, linux-block; +Cc: hch, osandov

On 04/10/2017 10:53 AM, Jens Axboe wrote:
> On 04/10/2017 10:21 AM, Bart Van Assche wrote:
>> On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
>>> @@ -1281,27 +1280,39 @@ static void blk_mq_run_work_fn(struct work_struct *work)
>>>  	struct blk_mq_hw_ctx *hctx;
>>>  
>>>  	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
>>> -	__blk_mq_run_hw_queue(hctx);
>>> -}
>>>  
>>> -static void blk_mq_delay_work_fn(struct work_struct *work)
>>> -{
>>> -	struct blk_mq_hw_ctx *hctx;
>>> +	/*
>>> +	 * If we are stopped, don't run the queue. The exception is if
>>> +	 * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
>>> +	 * the STOPPED bit and run it.
>>> +	 */
>>> +	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
>>> +		if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
>>> +			return;
>>>  
>>> -	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
>>> +		clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
>>> +		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
>>> +	}
>>>  
>>> -	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
>>> -		__blk_mq_run_hw_queue(hctx);
>>> +	__blk_mq_run_hw_queue(hctx);
>>>  }
>>>  
>>> +
>>>  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
>>>  {
>>>  	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
>>>  		return;
>>>  
>>> +	/*
>>> +	 * Stop the hw queue, then modify currently delayed work.
>>> +	 * This should prevent us from running the queue prematurely.
>>> +	 * Mark the queue as auto-clearing STOPPED when it runs.
>>> +	 */
>>>  	blk_mq_stop_hw_queue(hctx);
>>> -	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
>>> -			&hctx->delay_work, msecs_to_jiffies(msecs));
>>> +	set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
>>> +	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
>>> +					&hctx->run_work,
>>> +					msecs_to_jiffies(msecs));
>>>  }
>>>  EXPORT_SYMBOL(blk_mq_delay_queue);
>>
>> Hello Jens,
>>
>> Is it possible for a block driver to call blk_mq_delay_queue() while
>> blk_mq_delay_work_fn() is running? Can that result in BLK_MQ_S_STOPPED
>> and BLK_MQ_S_START_ON_RUN being checked by blk_mq_delay_work_fn() after
>> blk_mq_delay_queue() has set BLK_MQ_S_STOPPED and before it has set
>> BLK_MQ_S_START_ON_RUN?
> 
> Yeah, I don't think it's bullet proof. I just looked at the in-kernel
> users, and there's just one, nvme/fc.c. And it looks really buggy,
> since it manually stops _all_ queues, then delays the one hw queue.
> So we'll end up with potentially a bunch of stopped queues, and only
> one getting restarted.
> 
> I think for blk_mq_delay_queue(), what we really care about is "this
> queue has to run again sometime in the future". If that happens
> much sooner than asked for, that's OK, the caller will just delay
> again if it needs it. For most cases, we'll be close.
> 
> Obviously we can't have ordering issues and end up in a bad state,
> we have to prevent that.
> 
> I'll fiddle with this a bit more.

Spent a bit of time looking at the workqueue code. Looks like we're
guaranteed that we'll have at least one run of the handler after
calling kblockd_mod_delayed_work_on(). If the handler is currently
running, the we will sucessfully queue a new invocation. That's the
troublesome case. If it's not currently running, we just push the run
sometime into the future. In both cases, we know it'll run _after_
setting BLK_MQ_S_START_ON_RUN, which is the important part.

Hence I think the patch is fine as-is. Let me know if you disagree!

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
  2017-04-10 16:09   ` Christoph Hellwig
  2017-04-10 16:17   ` Bart Van Assche
@ 2017-04-11 18:00   ` Bart Van Assche
  2017-04-14 20:02     ` Jens Axboe
  2017-04-28  2:12   ` Ming Lei
  3 siblings, 1 reply; 15+ messages in thread
From: Bart Van Assche @ 2017-04-11 18:00 UTC (permalink / raw)
  To: linux-block, axboe; +Cc: hch, osandov

On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
>  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
>  {
> -	cancel_work(&hctx->run_work);
> +	cancel_delayed_work(&hctx->run_work);
>  	cancel_delayed_work(&hctx->delay_work);
>  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
>  }

Hello Jens,

I would like to change the above cancel_*work() calls into cancel_*work_syn=
c()
calls because this code is used when e.g. switching between I/O schedulers =
and
no .queue_rq() calls must be ongoing while switching between schedulers. Do=
 you
want to integrate that change into this patch or do you want me to post a
separate patch? In the latter case, should I start from your for-next branc=
h
to develop that patch or from your for-next branch + this patch series?

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-11 18:00   ` Bart Van Assche
@ 2017-04-14 20:02     ` Jens Axboe
  2017-04-14 20:56       ` Bart Van Assche
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2017-04-14 20:02 UTC (permalink / raw)
  To: Bart Van Assche, linux-block; +Cc: hch, osandov

On 04/11/2017 12:00 PM, Bart Van Assche wrote:
> On Mon, 2017-04-10 at 09:54 -0600, Jens Axboe wrote:
>>  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
>>  {
>> -	cancel_work(&hctx->run_work);
>> +	cancel_delayed_work(&hctx->run_work);
>>  	cancel_delayed_work(&hctx->delay_work);
>>  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
>>  }
> 
> Hello Jens,
> 
> I would like to change the above cancel_*work() calls into cancel_*work_sync()
> calls because this code is used when e.g. switching between I/O schedulers and
> no .queue_rq() calls must be ongoing while switching between schedulers. Do you
> want to integrate that change into this patch or do you want me to post a
> separate patch? In the latter case, should I start from your for-next branch
> to develop that patch or from your for-next branch + this patch series?

I agree, we should make it _sync(). I'll just make the edit in the patch
when I send it out again. I was waiting for further comments on patch 3/3.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-14 20:02     ` Jens Axboe
@ 2017-04-14 20:56       ` Bart Van Assche
  0 siblings, 0 replies; 15+ messages in thread
From: Bart Van Assche @ 2017-04-14 20:56 UTC (permalink / raw)
  To: linux-block, axboe; +Cc: hch, osandov

On Fri, 2017-04-14 at 14:02 -0600, Jens Axboe wrote:
> I was waiting for further comments on patch 3/3.

Hello Jens,

Patch 3/3 is probably fine but I hope that you understand that the introduc=
tion
of a new race condition does not make me enthusiast. Should your explanatio=
n of
why that race is harmless perhaps be added as a comment?

Bart.=

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work
  2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
                     ` (2 preceding siblings ...)
  2017-04-11 18:00   ` Bart Van Assche
@ 2017-04-28  2:12   ` Ming Lei
  3 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2017-04-28  2:12 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, osandov, hch, bart.vanassche

On Mon, Apr 10, 2017 at 09:54:54AM -0600, Jens Axboe wrote:
> They serve the exact same purpose. Get rid of the non-delayed
> work variant, and just run it without delay for the normal case.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  block/blk-core.c       |  2 +-
>  block/blk-mq.c         | 27 ++++++---------------------
>  include/linux/blk-mq.h |  3 +--
>  3 files changed, 8 insertions(+), 24 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 8654aa0cef6d..d58541e4dc7b 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -269,7 +269,7 @@ void blk_sync_queue(struct request_queue *q)
>  		int i;
>  
>  		queue_for_each_hw_ctx(q, hctx, i) {
> -			cancel_work_sync(&hctx->run_work);
> +			cancel_delayed_work_sync(&hctx->run_work);
>  			cancel_delayed_work_sync(&hctx->delay_work);
>  		}
>  	} else {
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index e2ef7b460924..7afba6ab5a96 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1168,13 +1168,9 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
>  		put_cpu();
>  	}
>  
> -	if (msecs == 0)
> -		kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx),
> -					 &hctx->run_work);
> -	else
> -		kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
> -						 &hctx->delayed_run_work,
> -						 msecs_to_jiffies(msecs));
> +	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
> +					 &hctx->run_work,
> +					 msecs_to_jiffies(msecs));
>  }
>  
>  void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
> @@ -1226,7 +1222,7 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
>  
>  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
>  {
> -	cancel_work(&hctx->run_work);
> +	cancel_delayed_work(&hctx->run_work);
>  	cancel_delayed_work(&hctx->delay_work);
>  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
>  }
> @@ -1284,17 +1280,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
>  {
>  	struct blk_mq_hw_ctx *hctx;
>  
> -	hctx = container_of(work, struct blk_mq_hw_ctx, run_work);
> -
> -	__blk_mq_run_hw_queue(hctx);
> -}
> -
> -static void blk_mq_delayed_run_work_fn(struct work_struct *work)
> -{
> -	struct blk_mq_hw_ctx *hctx;
> -
> -	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work);
> -
> +	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
>  	__blk_mq_run_hw_queue(hctx);
>  }
>  
> @@ -1899,8 +1885,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
>  	if (node == NUMA_NO_NODE)
>  		node = hctx->numa_node = set->numa_node;
>  
> -	INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
> -	INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn);
> +	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
>  	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
>  	spin_lock_init(&hctx->lock);
>  	INIT_LIST_HEAD(&hctx->dispatch);
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index d75de612845d..2b4573a9ccf4 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -15,7 +15,7 @@ struct blk_mq_hw_ctx {
>  		unsigned long		state;		/* BLK_MQ_S_* flags */
>  	} ____cacheline_aligned_in_smp;
>  
> -	struct work_struct	run_work;
> +	struct delayed_work	run_work;
>  	cpumask_var_t		cpumask;
>  	int			next_cpu;
>  	int			next_cpu_batch;
> @@ -51,7 +51,6 @@ struct blk_mq_hw_ctx {
>  
>  	atomic_t		nr_active;
>  
> -	struct delayed_work	delayed_run_work;
>  	struct delayed_work	delay_work;
>  
>  	struct hlist_node	cpuhp_dead;
> -- 
> 2.7.4

Reviewed-by: Ming Lei <ming.lei@redhat.com>

Thanks,
Ming

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-04-28  2:12 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-10 15:54 [PATCH 0/3] blk-mq: unify hardware queue run handlers Jens Axboe
2017-04-10 15:54 ` [PATCH 1/3] blk-mq: unify hctx delayed_run_work and run_work Jens Axboe
2017-04-10 16:09   ` Christoph Hellwig
2017-04-10 16:17   ` Bart Van Assche
2017-04-11 18:00   ` Bart Van Assche
2017-04-14 20:02     ` Jens Axboe
2017-04-14 20:56       ` Bart Van Assche
2017-04-28  2:12   ` Ming Lei
2017-04-10 15:54 ` [PATCH 2/3] block: add kblock_mod_delayed_work_on() Jens Axboe
2017-04-10 16:10   ` Christoph Hellwig
2017-04-10 16:17   ` Bart Van Assche
2017-04-10 15:54 ` [PATCH 3/3] blk-mq: unify hctx delay_work and run_work Jens Axboe
2017-04-10 16:21   ` Bart Van Assche
2017-04-10 16:53     ` Jens Axboe
2017-04-10 17:23       ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.