linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: axboe@fb.com
Cc: Jens Axboe <axboe@kernel.dk>,
	jack@suse.cz, linux-nvdimm@ml01.01.org, david@fromorbit.com,
	linux-kernel@vger.kernel.org, ross.zwisler@linux.intel.com,
	hch@lst.de
Subject: [PATCH v3 09/15] block: notify queue death confirmation
Date: Sun, 01 Nov 2015 23:30:31 -0500	[thread overview]
Message-ID: <20151102043031.6610.41822.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <20151102042941.6610.27784.stgit@dwillia2-desk3.amr.corp.intel.com>

The pmem driver arranges for references to be taken against the queue
while pages it allocated via devm_memremap_pages() are in use.  At
shutdown time, before those pages can be deallocated, they need to be
unmapped, and guaranteed to be idle.  The unmap scan can only be done
once we are certain no new page references will be taken.  Once the blk
queue percpu_ref is confirmed dead the dax core will cease allowing new
references and we can free these "device" pages.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 block/blk-core.c       |   12 +++++++++---
 block/blk-mq.c         |   19 +++++++++++++++----
 include/linux/blkdev.h |    4 +++-
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 6ebe33ed5154..5159946a2b41 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -516,6 +516,12 @@ void blk_set_queue_dying(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
+void blk_wait_queue_dead(struct request_queue *q)
+{
+	wait_event(q->q_freeze_wq, q->q_usage_dead);
+}
+EXPORT_SYMBOL(blk_wait_queue_dead);
+
 /**
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
@@ -641,7 +647,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
 		if (!(gfp & __GFP_WAIT))
 			return -EBUSY;
 
-		ret = wait_event_interruptible(q->mq_freeze_wq,
+		ret = wait_event_interruptible(q->q_freeze_wq,
 				!atomic_read(&q->mq_freeze_depth) ||
 				blk_queue_dying(q));
 		if (blk_queue_dying(q))
@@ -661,7 +667,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
 	struct request_queue *q =
 		container_of(ref, struct request_queue, q_usage_counter);
 
-	wake_up_all(&q->mq_freeze_wq);
+	wake_up_all(&q->q_freeze_wq);
 }
 
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
@@ -723,7 +729,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->bypass_depth = 1;
 	__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
 
-	init_waitqueue_head(&q->mq_freeze_wq);
+	init_waitqueue_head(&q->q_freeze_wq);
 
 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6c240712553a..e0417febbcd4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,13 +78,23 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 	clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
 }
 
+static void blk_confirm_queue_death(struct percpu_ref *ref)
+{
+	struct request_queue *q = container_of(ref, typeof(*q),
+			q_usage_counter);
+
+	q->q_usage_dead = 1;
+	wake_up_all(&q->q_freeze_wq);
+}
+
 void blk_mq_freeze_queue_start(struct request_queue *q)
 {
 	int freeze_depth;
 
 	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
 	if (freeze_depth == 1) {
-		percpu_ref_kill(&q->q_usage_counter);
+		percpu_ref_kill_and_confirm(&q->q_usage_counter,
+				blk_confirm_queue_death);
 		blk_mq_run_hw_queues(q, false);
 	}
 }
@@ -92,7 +102,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
 static void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
-	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
+	wait_event(q->q_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
 }
 
 /*
@@ -130,7 +140,8 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 	WARN_ON_ONCE(freeze_depth < 0);
 	if (!freeze_depth) {
 		percpu_ref_reinit(&q->q_usage_counter);
-		wake_up_all(&q->mq_freeze_wq);
+		q->q_usage_dead = 0;
+		wake_up_all(&q->q_freeze_wq);
 	}
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
@@ -149,7 +160,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
 	 * dying, we need to ensure that processes currently waiting on
 	 * the queue are notified as well.
 	 */
-	wake_up_all(&q->mq_freeze_wq);
+	wake_up_all(&q->q_freeze_wq);
 }
 
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b78e01542e9e..e121e5e0c6ac 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -431,6 +431,7 @@ struct request_queue {
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
+	unsigned int		q_usage_dead:1;
 	struct blk_flush_queue	*fq;
 
 	struct list_head	requeue_list;
@@ -453,7 +454,7 @@ struct request_queue {
 	struct throtl_data *td;
 #endif
 	struct rcu_head		rcu_head;
-	wait_queue_head_t	mq_freeze_wq;
+	wait_queue_head_t	q_freeze_wq;
 	struct percpu_ref	q_usage_counter;
 	struct list_head	all_q_node;
 
@@ -953,6 +954,7 @@ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 						      request_fn_proc *, spinlock_t *);
+extern void blk_wait_queue_dead(struct request_queue *q);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);


  parent reply	other threads:[~2015-11-02  4:36 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-02  4:29 [PATCH v3 00/15] block, dax updates for 4.4 Dan Williams
2015-11-02  4:29 ` [PATCH v3 01/15] pmem, dax: clean up clear_pmem() Dan Williams
2015-11-02  4:29 ` [PATCH v3 02/15] dax: increase granularity of dax_clear_blocks() operations Dan Williams
2015-11-03  0:51   ` Dave Chinner
2015-11-03  3:27     ` Dan Williams
2015-11-03  4:48       ` Dave Chinner
2015-11-03  5:31         ` Dan Williams
2015-11-03  5:52           ` Dave Chinner
2015-11-03  7:24             ` Dan Williams
2015-11-03 16:21           ` Jan Kara
2015-11-03 17:57           ` Ross Zwisler
2015-11-03 20:59             ` Dave Chinner
2015-11-02  4:29 ` [PATCH v3 03/15] block, dax: fix lifetime of in-kernel dax mappings with dax_map_atomic() Dan Williams
2015-11-03 19:01   ` Ross Zwisler
2015-11-03 19:09     ` Jeff Moyer
2015-11-03 22:50     ` Dan Williams
2016-01-18 10:42   ` Geert Uytterhoeven
2015-11-02  4:30 ` [PATCH v3 04/15] libnvdimm, pmem: move request_queue allocation earlier in probe Dan Williams
2015-11-03 19:15   ` Ross Zwisler
2015-11-02  4:30 ` [PATCH v3 05/15] libnvdimm, pmem: fix size trim in pmem_direct_access() Dan Williams
2015-11-03 19:32   ` Ross Zwisler
2015-11-03 21:39     ` Dan Williams
2015-11-02  4:30 ` [PATCH v3 06/15] um: kill pfn_t Dan Williams
2015-11-02  4:30 ` [PATCH v3 07/15] kvm: rename pfn_t to kvm_pfn_t Dan Williams
2015-11-02  4:30 ` [PATCH v3 08/15] mm, dax, pmem: introduce pfn_t Dan Williams
2015-11-02 16:30   ` Joe Perches
2015-11-02  4:30 ` Dan Williams [this message]
2015-11-02  4:30 ` [PATCH v3 10/15] dax, pmem: introduce zone_device_revoke() and devm_memunmap_pages() Dan Williams
2015-11-02  4:30 ` [PATCH v3 11/15] block: introduce bdev_file_inode() Dan Williams
2015-11-02  4:30 ` [PATCH v3 12/15] block: enable dax for raw block devices Dan Williams
2015-11-02  4:30 ` [PATCH v3 13/15] block, dax: make dax mappings opt-in by default Dan Williams
2015-11-03  0:32   ` Dave Chinner
2015-11-03  7:35     ` Dan Williams
2015-11-03 20:20       ` Dave Chinner
2015-11-03 23:04         ` Dan Williams
2015-11-04 19:23           ` Dan Williams
2015-11-02  4:30 ` [PATCH v3 14/15] dax: dirty extent notification Dan Williams
2015-11-03  1:16   ` Dave Chinner
2015-11-03  4:56     ` Dan Williams
2015-11-03  5:40       ` Dave Chinner
2015-11-03  7:20         ` Dan Williams
2015-11-03 20:51           ` Dave Chinner
2015-11-03 21:19             ` Dan Williams
2015-11-03 21:37             ` Ross Zwisler
2015-11-03 21:43               ` Dan Williams
2015-11-03 21:18       ` Ross Zwisler
2015-11-03 21:34         ` Dan Williams
2015-11-02  4:31 ` [PATCH v3 15/15] pmem: blkdev_issue_flush support Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151102043031.6610.41822.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=axboe@fb.com \
    --cc=axboe@kernel.dk \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@ml01.01.org \
    --cc=ross.zwisler@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).