All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] blk-mq: Allow blocking queue tag iter callbacks
@ 2018-09-24 21:09 Keith Busch
  2018-09-25  2:11 ` jianchao.wang
  0 siblings, 1 reply; 7+ messages in thread
From: Keith Busch @ 2018-09-24 21:09 UTC (permalink / raw)
  To: Linux Block; +Cc: Jens Axboe, Bart Van Assche, Keith Busch, Jianchao Wang

A recent commit had tag iterator callbacks run under the rcu read
lock. Existing callbacks exist that do not satisy the rcu non-blocking
requirement.

The commit intended to prevent an iterator from accessing a queue that's
being modified. This patch fixes the original issue by taking a queue
reference if the queue is not frozen instead of a rcu read lock.

Fixes: f5bbbbe4d6357 ("blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter")
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 block/blk-mq-tag.c | 30 +++++++++++++++++-------------
 block/blk-mq-tag.h |  2 +-
 block/blk-mq.c     | 22 +---------------------
 3 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 94e1ed667b6e..63542642a017 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -314,24 +314,27 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
 
-void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
+bool blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 		void *priv)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	/*
-	 * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
-	 * queue_hw_ctx after freeze the queue. So we could use q_usage_counter
-	 * to avoid race with it. __blk_mq_update_nr_hw_queues will users
-	 * synchronize_rcu to ensure all of the users go out of the critical
-	 * section below and see zeroed q_usage_counter.
+	/* A deadlock might occur if a request is stuck requiring a
+	 * timeout at the same time a queue freeze is waiting
+	 * completion, since the timeout code would not be able to
+	 * acquire the queue reference here.
+	 *
+	 * That's why we don't use blk_queue_enter here; instead, we use
+	 * percpu_ref_tryget directly, because we need to be able to
+	 * obtain a reference even in the short window between the queue
+	 * starting to freeze, by dropping the first reference in
+	 * blk_freeze_queue_start, and the moment the last request is
+	 * consumed, marked by the instant q_usage_counter reaches
+	 * zero.
 	 */
-	rcu_read_lock();
-	if (percpu_ref_is_zero(&q->q_usage_counter)) {
-		rcu_read_unlock();
-		return;
-	}
+	if (!percpu_ref_tryget(&q->q_usage_counter))
+		return false;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		struct blk_mq_tags *tags = hctx->tags;
@@ -347,7 +350,8 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 			bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
 		bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
 	}
-	rcu_read_unlock();
+	blk_queue_exit(q);
+	return true;
 }
 
 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 61deab0b5a5a..36b3bc90e867 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -33,7 +33,7 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
-void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
+bool blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 		void *priv);
 
 static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85a1c1a59c72..019f9b169887 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -848,24 +848,9 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	/* A deadlock might occur if a request is stuck requiring a
-	 * timeout at the same time a queue freeze is waiting
-	 * completion, since the timeout code would not be able to
-	 * acquire the queue reference here.
-	 *
-	 * That's why we don't use blk_queue_enter here; instead, we use
-	 * percpu_ref_tryget directly, because we need to be able to
-	 * obtain a reference even in the short window between the queue
-	 * starting to freeze, by dropping the first reference in
-	 * blk_freeze_queue_start, and the moment the last request is
-	 * consumed, marked by the instant q_usage_counter reaches
-	 * zero.
-	 */
-	if (!percpu_ref_tryget(&q->q_usage_counter))
+	if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
 		return;
 
-	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
-
 	if (next != 0) {
 		mod_timer(&q->timeout, next);
 	} else {
@@ -881,7 +866,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
 				blk_mq_tag_idle(hctx);
 		}
 	}
-	blk_queue_exit(q);
 }
 
 struct flush_busy_ctx_data {
@@ -2974,10 +2958,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_freeze_queue(q);
-	/*
-	 * Sync with blk_mq_queue_tag_busy_iter.
-	 */
-	synchronize_rcu();
 	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] blk-mq: Allow blocking queue tag iter callbacks
  2018-09-24 21:09 [PATCH] blk-mq: Allow blocking queue tag iter callbacks Keith Busch
@ 2018-09-25  2:11 ` jianchao.wang
  2018-09-25  2:20   ` Bart Van Assche
  0 siblings, 1 reply; 7+ messages in thread
From: jianchao.wang @ 2018-09-25  2:11 UTC (permalink / raw)
  To: Keith Busch, Linux Block; +Cc: Jens Axboe, Bart Van Assche

Hi Keith

On 09/25/2018 05:09 AM, Keith Busch wrote:
> -	/* A deadlock might occur if a request is stuck requiring a
> -	 * timeout at the same time a queue freeze is waiting
> -	 * completion, since the timeout code would not be able to
> -	 * acquire the queue reference here.
> -	 *
> -	 * That's why we don't use blk_queue_enter here; instead, we use
> -	 * percpu_ref_tryget directly, because we need to be able to
> -	 * obtain a reference even in the short window between the queue
> -	 * starting to freeze, by dropping the first reference in
> -	 * blk_freeze_queue_start, and the moment the last request is
> -	 * consumed, marked by the instant q_usage_counter reaches
> -	 * zero.
> -	 */
> -	if (!percpu_ref_tryget(&q->q_usage_counter))
> +	if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
>  		return;

We cannot discard the percpu_ref_tryget here.

There following code in blk_mq_timeout_work still need it:

	if (next != 0) {
		mod_timer(&q->timeout, next);
	} else {
		queue_for_each_hw_ctx(q, hctx, i) {
			/* the hctx may be unmapped, so check it here */
			if (blk_mq_hw_queue_mapped(hctx))
				blk_mq_tag_idle(hctx);
		}
	}

Thanks
Jianchao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] blk-mq: Allow blocking queue tag iter callbacks
  2018-09-25  2:11 ` jianchao.wang
@ 2018-09-25  2:20   ` Bart Van Assche
  2018-09-25  2:39     ` jianchao.wang
  0 siblings, 1 reply; 7+ messages in thread
From: Bart Van Assche @ 2018-09-25  2:20 UTC (permalink / raw)
  To: jianchao.wang, Keith Busch, Linux Block; +Cc: Jens Axboe

On 9/24/18 7:11 PM, jianchao.wang wrote:
> Hi Keith
> 
> On 09/25/2018 05:09 AM, Keith Busch wrote:
>> -	/* A deadlock might occur if a request is stuck requiring a
>> -	 * timeout at the same time a queue freeze is waiting
>> -	 * completion, since the timeout code would not be able to
>> -	 * acquire the queue reference here.
>> -	 *
>> -	 * That's why we don't use blk_queue_enter here; instead, we use
>> -	 * percpu_ref_tryget directly, because we need to be able to
>> -	 * obtain a reference even in the short window between the queue
>> -	 * starting to freeze, by dropping the first reference in
>> -	 * blk_freeze_queue_start, and the moment the last request is
>> -	 * consumed, marked by the instant q_usage_counter reaches
>> -	 * zero.
>> -	 */
>> -	if (!percpu_ref_tryget(&q->q_usage_counter))
>> +	if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
>>   		return;
> 
> We cannot discard the percpu_ref_tryget here.
> 
> There following code in blk_mq_timeout_work still need it:
> 
> 	if (next != 0) {
> 		mod_timer(&q->timeout, next);
> 	} else {
> 		queue_for_each_hw_ctx(q, hctx, i) {
> 			/* the hctx may be unmapped, so check it here */
> 			if (blk_mq_hw_queue_mapped(hctx))
> 				blk_mq_tag_idle(hctx);
> 		}
> 	}

Hi Jianchao,

Had you noticed that the percpu_ref_tryget() call has been moved into
blk_mq_queue_tag_busy_iter()?

Bart.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] blk-mq: Allow blocking queue tag iter callbacks
  2018-09-25  2:20   ` Bart Van Assche
@ 2018-09-25  2:39     ` jianchao.wang
  2018-09-25 14:39       ` Keith Busch
  0 siblings, 1 reply; 7+ messages in thread
From: jianchao.wang @ 2018-09-25  2:39 UTC (permalink / raw)
  To: Bart Van Assche, Keith Busch, Linux Block; +Cc: Jens Axboe

Hi Bart

On 09/25/2018 10:20 AM, Bart Van Assche wrote:
> On 9/24/18 7:11 PM, jianchao.wang wrote:
>> Hi Keith
>>
>> On 09/25/2018 05:09 AM, Keith Busch wrote:
>>> -    /* A deadlock might occur if a request is stuck requiring a
>>> -     * timeout at the same time a queue freeze is waiting
>>> -     * completion, since the timeout code would not be able to
>>> -     * acquire the queue reference here.
>>> -     *
>>> -     * That's why we don't use blk_queue_enter here; instead, we use
>>> -     * percpu_ref_tryget directly, because we need to be able to
>>> -     * obtain a reference even in the short window between the queue
>>> -     * starting to freeze, by dropping the first reference in
>>> -     * blk_freeze_queue_start, and the moment the last request is
>>> -     * consumed, marked by the instant q_usage_counter reaches
>>> -     * zero.
>>> -     */
>>> -    if (!percpu_ref_tryget(&q->q_usage_counter))
>>> +    if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
>>>           return;
>>
>> We cannot discard the percpu_ref_tryget here.
>>
>> There following code in blk_mq_timeout_work still need it:
>>
>>     if (next != 0) {
>>         mod_timer(&q->timeout, next);
>>     } else {
>>         queue_for_each_hw_ctx(q, hctx, i) {
>>             /* the hctx may be unmapped, so check it here */
>>             if (blk_mq_hw_queue_mapped(hctx))
>>                 blk_mq_tag_idle(hctx);
>>         }
>>     }
> 
> Hi Jianchao,
> 
> Had you noticed that the percpu_ref_tryget() call has been moved into
> blk_mq_queue_tag_busy_iter()?
> 

Yes.

But the issue is the left part of blk_mq_timeout_work is moved out of protection of q refcount.

Thanks
Jianchao

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] blk-mq: Allow blocking queue tag iter callbacks
  2018-09-25  2:39     ` jianchao.wang
@ 2018-09-25 14:39       ` Keith Busch
  2018-09-25 15:14         ` Bart Van Assche
  0 siblings, 1 reply; 7+ messages in thread
From: Keith Busch @ 2018-09-25 14:39 UTC (permalink / raw)
  To: jianchao.wang; +Cc: Bart Van Assche, Linux Block, Jens Axboe

On Tue, Sep 25, 2018 at 10:39:46AM +0800, jianchao.wang wrote:
> Hi Bart
> 
> On 09/25/2018 10:20 AM, Bart Van Assche wrote:
> > On 9/24/18 7:11 PM, jianchao.wang wrote:
> >> Hi Keith
> >>
> >> On 09/25/2018 05:09 AM, Keith Busch wrote:
> >>> -��� /* A deadlock might occur if a request is stuck requiring a
> >>> -���� * timeout at the same time a queue freeze is waiting
> >>> -���� * completion, since the timeout code would not be able to
> >>> -���� * acquire the queue reference here.
> >>> -���� *
> >>> -���� * That's why we don't use blk_queue_enter here; instead, we use
> >>> -���� * percpu_ref_tryget directly, because we need to be able to
> >>> -���� * obtain a reference even in the short window between the queue
> >>> -���� * starting to freeze, by dropping the first reference in
> >>> -���� * blk_freeze_queue_start, and the moment the last request is
> >>> -���� * consumed, marked by the instant q_usage_counter reaches
> >>> -���� * zero.
> >>> -���� */
> >>> -��� if (!percpu_ref_tryget(&q->q_usage_counter))
> >>> +��� if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
> >>> ��������� return;
> >>
> >> We cannot discard the percpu_ref_tryget here.
> >>
> >> There following code in blk_mq_timeout_work still need it:
> >>
> >> ����if (next != 0) {
> >> ������� mod_timer(&q->timeout, next);
> >> ����} else {
> >> ������� queue_for_each_hw_ctx(q, hctx, i) {
> >> ����������� /* the hctx may be unmapped, so check it here */
> >> ����������� if (blk_mq_hw_queue_mapped(hctx))
> >> ��������������� blk_mq_tag_idle(hctx);
> >> ������� }
> >> ����}
> > 
> > Hi Jianchao,
> > 
> > Had you noticed that the percpu_ref_tryget() call has been moved into
> > blk_mq_queue_tag_busy_iter()?
> > 
> 
> Yes.
> 
> But the issue is the left part of blk_mq_timeout_work is moved out of protection of q refcount.

I'm not sure what you mean by "left part". The only part that isn't
outside the reference with this patch is the part Bart pointed out.

This looks like it may be fixed by either moving the refcount back up a
level to all the callers of blk_mq_queue_tag_busy_iter, or add
cancel_work_sync(&q->timeout_work) to __blk_mq_update_nr_hw_queues after
the freeze.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] blk-mq: Allow blocking queue tag iter callbacks
  2018-09-25 14:39       ` Keith Busch
@ 2018-09-25 15:14         ` Bart Van Assche
  2018-09-25 15:47           ` Keith Busch
  0 siblings, 1 reply; 7+ messages in thread
From: Bart Van Assche @ 2018-09-25 15:14 UTC (permalink / raw)
  To: Keith Busch, jianchao.wang; +Cc: Linux Block, Jens Axboe

On Tue, 2018-09-25 at 08:39 -0600, Keith Busch wrote:
> On Tue, Sep 25, 2018 at 10:39:46AM +0800, jianchao.wang wrote:
> > But the issue is the left part of blk_mq_timeout_work is moved out of protection of q refcount.
> 
> I'm not sure what you mean by "left part". The only part that isn't
> outside the reference with this patch is the part Bart pointed out.
> 
> This looks like it may be fixed by either moving the refcount back up a
> level to all the callers of blk_mq_queue_tag_busy_iter, or add
> cancel_work_sync(&q->timeout_work) to __blk_mq_update_nr_hw_queues after
> the freeze.

Hi Keith,

How about applying the following (untested) patch on top of your patch?

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 019f9b169887..099e203b5213 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -851,6 +851,9 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
 		return;
 
+	if (!percpu_ref_tryget(&q->q_usage_counter))
+		return;
+
 	if (next != 0) {
 		mod_timer(&q->timeout, next);
 	} else {
@@ -866,6 +869,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
 				blk_mq_tag_idle(hctx);
 		}
 	}
+	blk_queue_exit(q);
 }

Thanks,

Bart.

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] blk-mq: Allow blocking queue tag iter callbacks
  2018-09-25 15:14         ` Bart Van Assche
@ 2018-09-25 15:47           ` Keith Busch
  0 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2018-09-25 15:47 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: jianchao.wang, Linux Block, Jens Axboe

On Tue, Sep 25, 2018 at 08:14:45AM -0700, Bart Van Assche wrote:
> On Tue, 2018-09-25 at 08:39 -0600, Keith Busch wrote:
> > On Tue, Sep 25, 2018 at 10:39:46AM +0800, jianchao.wang wrote:
> > > But the issue is the left part of blk_mq_timeout_work is moved out of protection of q refcount.
> > 
> > I'm not sure what you mean by "left part". The only part that isn't
> > outside the reference with this patch is the part Bart pointed out.
> > 
> > This looks like it may be fixed by either moving the refcount back up a
> > level to all the callers of blk_mq_queue_tag_busy_iter, or add
> > cancel_work_sync(&q->timeout_work) to __blk_mq_update_nr_hw_queues after
> > the freeze.
> 
> Hi Keith,
> 
> How about applying the following (untested) patch on top of your patch?

Yep, this works. I can fold in a v2.
 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 019f9b169887..099e203b5213 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -851,6 +851,9 @@ static void blk_mq_timeout_work(struct work_struct *work)
>  	if (!blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next))
>  		return;
>  
> +	if (!percpu_ref_tryget(&q->q_usage_counter))
> +		return;
> +
>  	if (next != 0) {
>  		mod_timer(&q->timeout, next);
>  	} else {
> @@ -866,6 +869,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
>  				blk_mq_tag_idle(hctx);
>  		}
>  	}
> +	blk_queue_exit(q);
>  }
> 
> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-09-25 20:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-24 21:09 [PATCH] blk-mq: Allow blocking queue tag iter callbacks Keith Busch
2018-09-25  2:11 ` jianchao.wang
2018-09-25  2:20   ` Bart Van Assche
2018-09-25  2:39     ` jianchao.wang
2018-09-25 14:39       ` Keith Busch
2018-09-25 15:14         ` Bart Van Assche
2018-09-25 15:47           ` Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.