All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH, RFC] blk-mq: use a delayed work item for timeouts
@ 2015-10-12 19:29 ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2015-10-12 19:29 UTC (permalink / raw)
  To: axboe; +Cc: hare, linux-scsi, linux-nvme

For some pending NVMe work I'd really love to be able to get my timeouts
from process context.  So far it seems only SCSI and NVMe use the blk-mq
timeout handler, and both don't seem to be particularly excited about
being called from time context.  Does anyone have an objection against
the patch below that switches it to use a delayed work item?  I could
make use of this quickly for NVMe, but for SCSI we still have to deal
with the old request code which can't be switched to a delayed work
as easily.
---
 block/blk-mq.c         | 11 ++++++-----
 include/linux/blkdev.h |  5 ++++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d921cd5..a7ae387 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -635,9 +635,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 	}
 }
 
-static void blk_mq_rq_timer(unsigned long priv)
+static void blk_mq_rq_timer_work(struct work_struct *work)
 {
-	struct request_queue *q = (struct request_queue *)priv;
+	struct request_queue *q =
+		container_of(work, struct request_queue, timeout_work.work);
 	struct blk_mq_timeout_data data = {
 		.next		= 0,
 		.next_set	= 0,
@@ -648,7 +649,7 @@ static void blk_mq_rq_timer(unsigned long priv)
 
 	if (data.next_set) {
 		data.next = blk_rq_timeout(round_jiffies_up(data.next));
-		mod_timer(&q->timeout, data.next);
+		mod_delayed_work(system_wq, &q->timeout_work, data.next);
 	} else {
 		struct blk_mq_hw_ctx *hctx;
 
@@ -2008,7 +2009,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 			    PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
 		goto err_hctxs;
 
-	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+	INIT_DELAYED_WORK(&q->timeout_work, blk_mq_rq_timer_work);
 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
 	q->nr_queues = nr_cpu_ids;
@@ -2173,7 +2174,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 		 * timeout handler can't touch hw queue during the
 		 * reinitialization
 		 */
-		del_timer_sync(&q->timeout);
+		cancel_delayed_work_sync(&q->timeout_work);
 	}
 
 	list_for_each_entry(q, &all_q_list, all_q_node)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 19c2e94..ecce48f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -401,7 +401,10 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
-	struct timer_list	timeout;
+	union {
+		struct timer_list	timeout;	/* legacy */
+		struct delayed_work	timeout_work;	/* blk-mq */
+	};
 	struct list_head	timeout_list;
 
 	struct list_head	icq_list;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH, RFC] blk-mq: use a delayed work item for timeouts
@ 2015-10-12 19:29 ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2015-10-12 19:29 UTC (permalink / raw)


For some pending NVMe work I'd really love to be able to get my timeouts
from process context.  So far it seems only SCSI and NVMe use the blk-mq
timeout handler, and both don't seem to be particularly excited about
being called from time context.  Does anyone have an objection against
the patch below that switches it to use a delayed work item?  I could
make use of this quickly for NVMe, but for SCSI we still have to deal
with the old request code which can't be switched to a delayed work
as easily.
---
 block/blk-mq.c         | 11 ++++++-----
 include/linux/blkdev.h |  5 ++++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d921cd5..a7ae387 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -635,9 +635,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 	}
 }
 
-static void blk_mq_rq_timer(unsigned long priv)
+static void blk_mq_rq_timer_work(struct work_struct *work)
 {
-	struct request_queue *q = (struct request_queue *)priv;
+	struct request_queue *q =
+		container_of(work, struct request_queue, timeout_work.work);
 	struct blk_mq_timeout_data data = {
 		.next		= 0,
 		.next_set	= 0,
@@ -648,7 +649,7 @@ static void blk_mq_rq_timer(unsigned long priv)
 
 	if (data.next_set) {
 		data.next = blk_rq_timeout(round_jiffies_up(data.next));
-		mod_timer(&q->timeout, data.next);
+		mod_delayed_work(system_wq, &q->timeout_work, data.next);
 	} else {
 		struct blk_mq_hw_ctx *hctx;
 
@@ -2008,7 +2009,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 			    PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
 		goto err_hctxs;
 
-	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+	INIT_DELAYED_WORK(&q->timeout_work, blk_mq_rq_timer_work);
 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
 	q->nr_queues = nr_cpu_ids;
@@ -2173,7 +2174,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 		 * timeout handler can't touch hw queue during the
 		 * reinitialization
 		 */
-		del_timer_sync(&q->timeout);
+		cancel_delayed_work_sync(&q->timeout_work);
 	}
 
 	list_for_each_entry(q, &all_q_list, all_q_node)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 19c2e94..ecce48f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -401,7 +401,10 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
-	struct timer_list	timeout;
+	union {
+		struct timer_list	timeout;	/* legacy */
+		struct delayed_work	timeout_work;	/* blk-mq */
+	};
 	struct list_head	timeout_list;
 
 	struct list_head	icq_list;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] blk-mq: use a delayed work item for timeouts
  2015-10-12 19:29 ` Christoph Hellwig
@ 2015-10-12 19:34   ` Jens Axboe
  -1 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-10-12 19:34 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: hare, linux-scsi, linux-nvme

On 10/12/2015 01:29 PM, Christoph Hellwig wrote:
> For some pending NVMe work I'd really love to be able to get my timeouts
> from process context.  So far it seems only SCSI and NVMe use the blk-mq
> timeout handler, and both don't seem to be particularly excited about
> being called from time context.  Does anyone have an objection against
> the patch below that switches it to use a delayed work item?  I could
> make use of this quickly for NVMe, but for SCSI we still have to deal
> with the old request code which can't be switched to a delayed work
> as easily.

No that's definitely fine with me, imho most error handling callbacks 
should be in process context for ease of use in the driver.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, RFC] blk-mq: use a delayed work item for timeouts
@ 2015-10-12 19:34   ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-10-12 19:34 UTC (permalink / raw)


On 10/12/2015 01:29 PM, Christoph Hellwig wrote:
> For some pending NVMe work I'd really love to be able to get my timeouts
> from process context.  So far it seems only SCSI and NVMe use the blk-mq
> timeout handler, and both don't seem to be particularly excited about
> being called from time context.  Does anyone have an objection against
> the patch below that switches it to use a delayed work item?  I could
> make use of this quickly for NVMe, but for SCSI we still have to deal
> with the old request code which can't be switched to a delayed work
> as easily.

No that's definitely fine with me, imho most error handling callbacks 
should be in process context for ease of use in the driver.


-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] blk-mq: use a delayed work item for timeouts
  2015-10-12 19:34   ` Jens Axboe
@ 2015-10-12 20:08     ` Jens Axboe
  -1 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-10-12 20:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: hare, linux-scsi, linux-nvme

On 10/12/2015 01:34 PM, Jens Axboe wrote:
> On 10/12/2015 01:29 PM, Christoph Hellwig wrote:
>> For some pending NVMe work I'd really love to be able to get my timeouts
>> from process context.  So far it seems only SCSI and NVMe use the blk-mq
>> timeout handler, and both don't seem to be particularly excited about
>> being called from time context.  Does anyone have an objection against
>> the patch below that switches it to use a delayed work item?  I could
>> make use of this quickly for NVMe, but for SCSI we still have to deal
>> with the old request code which can't be switched to a delayed work
>> as easily.
>
> No that's definitely fine with me, imho most error handling callbacks
> should be in process context for ease of use in the driver.

Took a closer look. The patch looks incomplete. The hot path for blk-mq 
is blk_add_timer(), looks like you left that one alone in the conversion?

Might be easier to just leave the timer alone, and if it actually fires 
_and_ we have to do something, punt to a workqueue instead of invoking 
the timeout handler directly.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, RFC] blk-mq: use a delayed work item for timeouts
@ 2015-10-12 20:08     ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-10-12 20:08 UTC (permalink / raw)


On 10/12/2015 01:34 PM, Jens Axboe wrote:
> On 10/12/2015 01:29 PM, Christoph Hellwig wrote:
>> For some pending NVMe work I'd really love to be able to get my timeouts
>> from process context.  So far it seems only SCSI and NVMe use the blk-mq
>> timeout handler, and both don't seem to be particularly excited about
>> being called from time context.  Does anyone have an objection against
>> the patch below that switches it to use a delayed work item?  I could
>> make use of this quickly for NVMe, but for SCSI we still have to deal
>> with the old request code which can't be switched to a delayed work
>> as easily.
>
> No that's definitely fine with me, imho most error handling callbacks
> should be in process context for ease of use in the driver.

Took a closer look. The patch looks incomplete. The hot path for blk-mq 
is blk_add_timer(), looks like you left that one alone in the conversion?

Might be easier to just leave the timer alone, and if it actually fires 
_and_ we have to do something, punt to a workqueue instead of invoking 
the timeout handler directly.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] blk-mq: use a delayed work item for timeouts
  2015-10-12 20:08     ` Jens Axboe
@ 2015-10-12 20:22       ` Christoph Hellwig
  -1 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2015-10-12 20:22 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, hare, linux-scsi, linux-nvme

On Mon, Oct 12, 2015 at 02:08:04PM -0600, Jens Axboe wrote:
>> No that's definitely fine with me, imho most error handling callbacks
>> should be in process context for ease of use in the driver.
>
> Took a closer look. The patch looks incomplete. The hot path for blk-mq is 
> blk_add_timer(), looks like you left that one alone in the conversion?

Oh, damn.  I had that part in my initial version that also crudely
converted the old request code and dropped a bit too much.  That should
defintively do the queue_deayed_work.

> Might be easier to just leave the timer alone, and if it actually fires 
> _and_ we have to do something, punt to a workqueue instead of invoking the 
> timeout handler directly.

queue_delayed_work just assigns two additional fields, then sets
timer->experies and does an add_timer.  So it's the generic implementation
of your above scheme.  I'd much rather use the generic version if
possible instead of trying to recreate it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, RFC] blk-mq: use a delayed work item for timeouts
@ 2015-10-12 20:22       ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2015-10-12 20:22 UTC (permalink / raw)


On Mon, Oct 12, 2015@02:08:04PM -0600, Jens Axboe wrote:
>> No that's definitely fine with me, imho most error handling callbacks
>> should be in process context for ease of use in the driver.
>
> Took a closer look. The patch looks incomplete. The hot path for blk-mq is 
> blk_add_timer(), looks like you left that one alone in the conversion?

Oh, damn.  I had that part in my initial version that also crudely
converted the old request code and dropped a bit too much.  That should
defintively do the queue_deayed_work.

> Might be easier to just leave the timer alone, and if it actually fires 
> _and_ we have to do something, punt to a workqueue instead of invoking the 
> timeout handler directly.

queue_delayed_work just assigns two additional fields, then sets
timer->experies and does an add_timer.  So it's the generic implementation
of your above scheme.  I'd much rather use the generic version if
possible instead of trying to recreate it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, RFC] blk-mq: use a delayed work item for timeouts
  2015-10-12 20:22       ` Christoph Hellwig
@ 2015-10-12 20:26         ` Jens Axboe
  -1 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-10-12 20:26 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: hare, linux-scsi, linux-nvme

On 10/12/2015 02:22 PM, Christoph Hellwig wrote:
> On Mon, Oct 12, 2015 at 02:08:04PM -0600, Jens Axboe wrote:
>>> No that's definitely fine with me, imho most error handling callbacks
>>> should be in process context for ease of use in the driver.
>>
>> Took a closer look. The patch looks incomplete. The hot path for blk-mq is
>> blk_add_timer(), looks like you left that one alone in the conversion?
>
> Oh, damn.  I had that part in my initial version that also crudely
> converted the old request code and dropped a bit too much.  That should
> defintively do the queue_deayed_work.

Yep

>> Might be easier to just leave the timer alone, and if it actually fires
>> _and_ we have to do something, punt to a workqueue instead of invoking the
>> timeout handler directly.
>
> queue_delayed_work just assigns two additional fields, then sets
> timer->experies and does an add_timer.  So it's the generic implementation
> of your above scheme.  I'd much rather use the generic version if
> possible instead of trying to recreate it.

I agree, converting to delayed work in general is the cleaner solution. 
The hot path is really NOT doing anything at all, that's the usual path. 
If it isn't, then we've screwed up. And the conversion to 
delayed_work_pending() from timer_pending() looks fine as well, that's 
another important piece.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, RFC] blk-mq: use a delayed work item for timeouts
@ 2015-10-12 20:26         ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-10-12 20:26 UTC (permalink / raw)


On 10/12/2015 02:22 PM, Christoph Hellwig wrote:
> On Mon, Oct 12, 2015@02:08:04PM -0600, Jens Axboe wrote:
>>> No that's definitely fine with me, imho most error handling callbacks
>>> should be in process context for ease of use in the driver.
>>
>> Took a closer look. The patch looks incomplete. The hot path for blk-mq is
>> blk_add_timer(), looks like you left that one alone in the conversion?
>
> Oh, damn.  I had that part in my initial version that also crudely
> converted the old request code and dropped a bit too much.  That should
> defintively do the queue_deayed_work.

Yep

>> Might be easier to just leave the timer alone, and if it actually fires
>> _and_ we have to do something, punt to a workqueue instead of invoking the
>> timeout handler directly.
>
> queue_delayed_work just assigns two additional fields, then sets
> timer->experies and does an add_timer.  So it's the generic implementation
> of your above scheme.  I'd much rather use the generic version if
> possible instead of trying to recreate it.

I agree, converting to delayed work in general is the cleaner solution. 
The hot path is really NOT doing anything at all, that's the usual path. 
If it isn't, then we've screwed up. And the conversion to 
delayed_work_pending() from timer_pending() looks fine as well, that's 
another important piece.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-10-12 20:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-12 19:29 [PATCH, RFC] blk-mq: use a delayed work item for timeouts Christoph Hellwig
2015-10-12 19:29 ` Christoph Hellwig
2015-10-12 19:34 ` Jens Axboe
2015-10-12 19:34   ` Jens Axboe
2015-10-12 20:08   ` Jens Axboe
2015-10-12 20:08     ` Jens Axboe
2015-10-12 20:22     ` Christoph Hellwig
2015-10-12 20:22       ` Christoph Hellwig
2015-10-12 20:26       ` Jens Axboe
2015-10-12 20:26         ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.