All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] block: per-distpatch_queue flush machinery
@ 2014-09-09 13:05 Ming Lei
  2014-09-09 13:05 ` [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
                   ` (8 more replies)
  0 siblings, 9 replies; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig

Hi,

As recent discussion, especially suggested by Christoph, this patchset
implements per-distpatch_queue flush machinery, so that:

	- current init_request and exit_request callbacks can
	cover flush request too, then the ugly and buggy copying
	way of initializing flush request's pdu can be fixed
    
	- flushing performance gets improved in case of multi hw-queue

About 70% throughput improvement is observed in sync write/randwrite
over multi dispatch-queue virtio-blk, see details in commit log
of patch 8/8.

This patchset can be pulled from below tree too:

	git://kernel.ubuntu.com/ming/linux.git v3.17-block-dev


 block/blk-core.c       |   11 +--
 block/blk-flush.c      |  239 +++++++++++++++++++++++++++++++++++++++---------
 block/blk-mq.c         |   27 +++---
 block/blk-mq.h         |    1 -
 block/blk-sysfs.c      |    4 +-
 block/blk.h            |   35 ++++++-
 include/linux/blk-mq.h |    2 +
 include/linux/blkdev.h |   10 +-
 8 files changed, 251 insertions(+), 78 deletions(-)

Thanks,
--
Ming Lei


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush()
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:35   ` Christoph Hellwig
  2014-09-09 13:05 ` [PATCH 2/8] block: introduce blk_init_flush and its pair Ming Lei
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

It is reasonable to allocate flush req in blk_mq_init_flush().

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-flush.c |   11 ++++++++++-
 block/blk-mq.c    |   16 ++++++----------
 block/blk-mq.h    |    2 +-
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3cb5e9e..75ca6cd0 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -474,7 +474,16 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-void blk_mq_init_flush(struct request_queue *q)
+int blk_mq_init_flush(struct request_queue *q)
 {
+	struct blk_mq_tag_set *set = q->tag_set;
+
 	spin_lock_init(&q->mq_flush_lock);
+
+	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
+				set->cmd_size, cache_line_size()),
+				GFP_KERNEL);
+	if (!q->flush_rq)
+		return -ENOMEM;
+	return 0;
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b5af123..d500793 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1830,17 +1830,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (set->ops->complete)
 		blk_queue_softirq_done(q, set->ops->complete);
 
-	blk_mq_init_flush(q);
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
-				set->cmd_size, cache_line_size()),
-				GFP_KERNEL);
-	if (!q->flush_rq)
-		goto err_hw;
-
 	if (blk_mq_init_hw_queues(q, set))
-		goto err_flush_rq;
+		goto err_hw;
 
 	mutex_lock(&all_q_mutex);
 	list_add_tail(&q->all_q_node, &all_q_list);
@@ -1848,12 +1841,15 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	blk_mq_add_queue_tag_set(set, q);
 
+	if (blk_mq_init_flush(q))
+		goto err_hw_queues;
+
 	blk_mq_map_swqueue(q);
 
 	return q;
 
-err_flush_rq:
-	kfree(q->flush_rq);
+err_hw_queues:
+	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 err_hw:
 	blk_cleanup_queue(q);
 err_hctxs:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ca4964a..b0bd9bc 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,7 +27,7 @@ struct blk_mq_ctx {
 
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
-void blk_mq_init_flush(struct request_queue *q);
+int blk_mq_init_flush(struct request_queue *q);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/8] block: introduce blk_init_flush and its pair
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
  2014-09-09 13:05 ` [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:36   ` Christoph Hellwig
  2014-09-09 13:05 ` [PATCH 3/8] block: move flush initialized stuff to blk_flush_init Ming Lei
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

These two functions are introduced to initialize and de-initialize
flush stuff centrally.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-core.c  |    5 ++---
 block/blk-flush.c |   19 ++++++++++++++++++-
 block/blk-mq.c    |    2 +-
 block/blk-mq.h    |    1 -
 block/blk-sysfs.c |    4 ++--
 block/blk.h       |    3 +++
 6 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 6946a42..0a9d172 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -705,8 +705,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 	if (!q)
 		return NULL;
 
-	q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
-	if (!q->flush_rq)
+	if (blk_init_flush(q))
 		return NULL;
 
 	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
@@ -742,7 +741,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 	return q;
 
 fail:
-	kfree(q->flush_rq);
+	blk_exit_flush(q);
 	return NULL;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 75ca6cd0..6932ee8 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -474,7 +474,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-int blk_mq_init_flush(struct request_queue *q)
+static int blk_mq_init_flush(struct request_queue *q)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
 
@@ -487,3 +487,20 @@ int blk_mq_init_flush(struct request_queue *q)
 		return -ENOMEM;
 	return 0;
 }
+
+int blk_init_flush(struct request_queue *q)
+{
+	if (q->mq_ops)
+		return blk_mq_init_flush(q);
+
+	q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
+	if (!q->flush_rq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void blk_exit_flush(struct request_queue *q)
+{
+	kfree(q->flush_rq);
+}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d500793..706181a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1841,7 +1841,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	blk_mq_add_queue_tag_set(set, q);
 
-	if (blk_mq_init_flush(q))
+	if (blk_init_flush(q))
 		goto err_hw_queues;
 
 	blk_mq_map_swqueue(q);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index b0bd9bc..a39cfa9 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,7 +27,6 @@ struct blk_mq_ctx {
 
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
-int blk_mq_init_flush(struct request_queue *q);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4db5abf..28d3a11 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -517,11 +517,11 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
+	blk_exit_flush(q);
+
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
 
-	kfree(q->flush_rq);
-
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk.h b/block/blk.h
index 6748c4f..261f734 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,6 +22,9 @@ static inline void __blk_get_queue(struct request_queue *q)
 	kobject_get(&q->kobj);
 }
 
+int blk_init_flush(struct request_queue *q);
+void blk_exit_flush(struct request_queue *q);
+
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask);
 void blk_exit_rl(struct request_list *rl);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/8] block: move flush initialized stuff to blk_flush_init
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
  2014-09-09 13:05 ` [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
  2014-09-09 13:05 ` [PATCH 2/8] block: introduce blk_init_flush and its pair Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:37   ` Christoph Hellwig
  2014-09-09 13:05 ` [PATCH 4/8] block: avoid to use q->flush_rq directly Ming Lei
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

These stuff is always used with flush req together, so
we can do that safely.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-core.c  |    3 ---
 block/blk-flush.c |    4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 0a9d172..222fe84 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -600,9 +600,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 #ifdef CONFIG_BLK_CGROUP
 	INIT_LIST_HEAD(&q->blkg_list);
 #endif
-	INIT_LIST_HEAD(&q->flush_queue[0]);
-	INIT_LIST_HEAD(&q->flush_queue[1]);
-	INIT_LIST_HEAD(&q->flush_data_in_flight);
 	INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6932ee8..a5b2a00 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -490,6 +490,10 @@ static int blk_mq_init_flush(struct request_queue *q)
 
 int blk_init_flush(struct request_queue *q)
 {
+	INIT_LIST_HEAD(&q->flush_queue[0]);
+	INIT_LIST_HEAD(&q->flush_queue[1]);
+	INIT_LIST_HEAD(&q->flush_data_in_flight);
+
 	if (q->mq_ops)
 		return blk_mq_init_flush(q);
 
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/8] block: avoid to use q->flush_rq directly
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
                   ` (2 preceding siblings ...)
  2014-09-09 13:05 ` [PATCH 3/8] block: move flush initialized stuff to blk_flush_init Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:38   ` Christoph Hellwig
  2014-09-09 13:05 ` [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery Ming Lei
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

This patch trys to use local variable to access flush request,
so that we can convert to per-queue flush machinery a bit easier.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-flush.c |   17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index a5b2a00..a59dd1a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -225,7 +225,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 
 	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
-		q->flush_rq->tag = -1;
+		flush_rq->tag = -1;
 	}
 
 	running = &q->flush_queue[q->flush_running_idx];
@@ -283,6 +283,7 @@ static bool blk_kick_flush(struct request_queue *q)
 	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
 	struct request *first_rq =
 		list_first_entry(pending, struct request, flush.list);
+	struct request *flush_rq = q->flush_rq;
 
 	/* C1 described at the top of this file */
 	if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
@@ -300,16 +301,16 @@ static bool blk_kick_flush(struct request_queue *q)
 	 */
 	q->flush_pending_idx ^= 1;
 
-	blk_rq_init(q, q->flush_rq);
+	blk_rq_init(q, flush_rq);
 	if (q->mq_ops)
-		blk_mq_clone_flush_request(q->flush_rq, first_rq);
+		blk_mq_clone_flush_request(flush_rq, first_rq);
 
-	q->flush_rq->cmd_type = REQ_TYPE_FS;
-	q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
-	q->flush_rq->rq_disk = first_rq->rq_disk;
-	q->flush_rq->end_io = flush_end_io;
+	flush_rq->cmd_type = REQ_TYPE_FS;
+	flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+	flush_rq->rq_disk = first_rq->rq_disk;
+	flush_rq->end_io = flush_end_io;
 
-	return blk_flush_queue_rq(q->flush_rq, false);
+	return blk_flush_queue_rq(flush_rq, false);
 }
 
 static void flush_data_end_io(struct request *rq, int error)
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
                   ` (3 preceding siblings ...)
  2014-09-09 13:05 ` [PATCH 4/8] block: avoid to use q->flush_rq directly Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:43   ` Christoph Hellwig
  2014-09-09 13:05 ` [PATCH 6/8] block: flush: avoid to figure out flush queue unnecessarily Ming Lei
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

This patch introduces 'struct blk_flush_queue' and puts all
flush machinery related stuff into this strcuture, so that

	- flush implementation details aren't exposed to driver
	- it is easy to convert to per dispatch-queue flush machinery

This patch is basically a mechanical replacement.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-core.c       |    3 +-
 block/blk-flush.c      |  106 ++++++++++++++++++++++++++++++++----------------
 block/blk-mq.c         |   10 +++--
 block/blk.h            |   22 +++++++++-
 include/linux/blkdev.h |   10 +----
 5 files changed, 101 insertions(+), 50 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 222fe84..d278a30 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -390,11 +390,12 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 		 * be drained.  Check all the queues and counters.
 		 */
 		if (drain_all) {
+			struct blk_flush_queue *fq = blk_get_flush_queue(q);
 			drain |= !list_empty(&q->queue_head);
 			for (i = 0; i < 2; i++) {
 				drain |= q->nr_rqs[i];
 				drain |= q->in_flight[i];
-				drain |= !list_empty(&q->flush_queue[i]);
+				drain |= !list_empty(&fq->flush_queue[i]);
 			}
 		}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a59dd1a..894a149 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -28,7 +28,7 @@
  *
  * The actual execution of flush is double buffered.  Whenever a request
  * needs to execute PRE or POSTFLUSH, it queues at
- * q->flush_queue[q->flush_pending_idx].  Once certain criteria are met, a
+ * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
  * flush is issued and the pending_idx is toggled.  When the flush
  * completes, all the requests which were pending are proceeded to the next
  * step.  This allows arbitrary merging of different types of FLUSH/FUA
@@ -157,7 +157,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  * completion and trigger the next step.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
+ * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
  *
  * RETURNS:
  * %true if requests were added to the dispatch queue, %false otherwise.
@@ -166,7 +166,8 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 				   int error)
 {
 	struct request_queue *q = rq->q;
-	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	bool queued = false, kicked;
 
 	BUG_ON(rq->flush.seq & seq);
@@ -182,12 +183,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 	case REQ_FSEQ_POSTFLUSH:
 		/* queue for flush */
 		if (list_empty(pending))
-			q->flush_pending_since = jiffies;
+			fq->flush_pending_since = jiffies;
 		list_move_tail(&rq->flush.list, pending);
 		break;
 
 	case REQ_FSEQ_DATA:
-		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
+		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
 		queued = blk_flush_queue_rq(rq, true);
 		break;
 
@@ -222,17 +223,18 @@ static void flush_end_io(struct request *flush_rq, int error)
 	bool queued = false;
 	struct request *rq, *n;
 	unsigned long flags = 0;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	if (q->mq_ops) {
-		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		spin_lock_irqsave(&fq->mq_flush_lock, flags);
 		flush_rq->tag = -1;
 	}
 
-	running = &q->flush_queue[q->flush_running_idx];
-	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
+	running = &fq->flush_queue[fq->flush_running_idx];
+	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
 
 	/* account completion of the flush request */
-	q->flush_running_idx ^= 1;
+	fq->flush_running_idx ^= 1;
 
 	if (!q->mq_ops)
 		elv_completed_request(q, flush_rq);
@@ -256,13 +258,13 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * directly into request_fn may confuse the driver.  Always use
 	 * kblockd.
 	 */
-	if (queued || q->flush_queue_delayed) {
+	if (queued || fq->flush_queue_delayed) {
 		WARN_ON(q->mq_ops);
 		blk_run_queue_async(q);
 	}
-	q->flush_queue_delayed = 0;
+	fq->flush_queue_delayed = 0;
 	if (q->mq_ops)
-		spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
 /**
@@ -273,33 +275,34 @@ static void flush_end_io(struct request *flush_rq, int error)
  * Please read the comment at the top of this file for more info.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
+ * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
  *
  * RETURNS:
  * %true if flush was issued, %false otherwise.
  */
 static bool blk_kick_flush(struct request_queue *q)
 {
-	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	struct request *first_rq =
 		list_first_entry(pending, struct request, flush.list);
-	struct request *flush_rq = q->flush_rq;
+	struct request *flush_rq = fq->flush_rq;
 
 	/* C1 described at the top of this file */
-	if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
+	if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
 		return false;
 
 	/* C2 and C3 */
-	if (!list_empty(&q->flush_data_in_flight) &&
+	if (!list_empty(&fq->flush_data_in_flight) &&
 	    time_before(jiffies,
-			q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
+			fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
 		return false;
 
 	/*
 	 * Issue flush and toggle pending_idx.  This makes pending_idx
 	 * different from running_idx, which means flush is in flight.
 	 */
-	q->flush_pending_idx ^= 1;
+	fq->flush_pending_idx ^= 1;
 
 	blk_rq_init(q, flush_rq);
 	if (q->mq_ops)
@@ -331,6 +334,7 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	unsigned long flags;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	ctx = rq->mq_ctx;
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
@@ -339,10 +343,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	 * After populating an empty queue, kick it to avoid stall.  Read
 	 * the comment in flush_end_io().
 	 */
-	spin_lock_irqsave(&q->mq_flush_lock, flags);
+	spin_lock_irqsave(&fq->mq_flush_lock, flags);
 	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
 		blk_mq_run_hw_queue(hctx, true);
-	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
 /**
@@ -410,11 +414,13 @@ void blk_insert_flush(struct request *rq)
 	rq->cmd_flags |= REQ_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
 	if (q->mq_ops) {
+		struct blk_flush_queue *fq = blk_get_flush_queue(q);
+
 		rq->end_io = mq_flush_data_end_io;
 
-		spin_lock_irq(&q->mq_flush_lock);
+		spin_lock_irq(&fq->mq_flush_lock);
 		blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
-		spin_unlock_irq(&q->mq_flush_lock);
+		spin_unlock_irq(&fq->mq_flush_lock);
 		return;
 	}
 	rq->end_io = flush_data_end_io;
@@ -478,34 +484,64 @@ EXPORT_SYMBOL(blkdev_issue_flush);
 static int blk_mq_init_flush(struct request_queue *q)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
-	spin_lock_init(&q->mq_flush_lock);
+	spin_lock_init(&fq->mq_flush_lock);
 
-	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
+	fq->flush_rq = kzalloc(round_up(sizeof(struct request) +
 				set->cmd_size, cache_line_size()),
 				GFP_KERNEL);
-	if (!q->flush_rq)
+	if (!fq->flush_rq)
 		return -ENOMEM;
 	return 0;
 }
 
-int blk_init_flush(struct request_queue *q)
+static void blk_mq_exit_flush(struct request_queue *q)
 {
-	INIT_LIST_HEAD(&q->flush_queue[0]);
-	INIT_LIST_HEAD(&q->flush_queue[1]);
-	INIT_LIST_HEAD(&q->flush_data_in_flight);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	kfree(fq->flush_rq);
+	kfree(fq);
+}
 
-	if (q->mq_ops)
-		return blk_mq_init_flush(q);
+int blk_init_flush(struct request_queue *q)
+{
+	int ret;
+	struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
 
-	q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
-	if (!q->flush_rq)
+	if (!fq)
 		return -ENOMEM;
 
+	q->fq = fq;
+	INIT_LIST_HEAD(&fq->flush_queue[0]);
+	INIT_LIST_HEAD(&fq->flush_queue[1]);
+	INIT_LIST_HEAD(&fq->flush_data_in_flight);
+
+	if (q->mq_ops) {
+		ret = blk_mq_init_flush(q);
+		if (ret)
+			goto failed;
+	} else {
+		ret = -ENOMEM;
+		fq->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
+		if (!fq->flush_rq)
+			goto failed;
+	}
+
 	return 0;
+
+ failed:
+	kfree(fq);
+	q->fq = NULL;
+	return ret;
 }
 
 void blk_exit_flush(struct request_queue *q)
 {
-	kfree(q->flush_rq);
+	if (q->mq_ops)
+		blk_mq_exit_flush(q);
+	else {
+		struct blk_flush_queue *fq = blk_get_flush_queue(q);
+		kfree(fq->flush_rq);
+		kfree(fq);
+	}
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 706181a..7782d14 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -508,20 +508,22 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-static inline bool is_flush_request(struct request *rq, unsigned int tag)
+static inline bool is_flush_request(struct request *rq,
+		struct blk_flush_queue *fq, unsigned int tag)
 {
 	return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
-			rq->q->flush_rq->tag == tag);
+			fq->flush_rq->tag == tag);
 }
 
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	struct request *rq = tags->rqs[tag];
+	struct blk_flush_queue *fq = blk_get_flush_queue(rq->q);
 
-	if (!is_flush_request(rq, tag))
+	if (!is_flush_request(rq, fq, tag))
 		return rq;
 
-	return rq->q->flush_rq;
+	return fq->flush_rq;
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
diff --git a/block/blk.h b/block/blk.h
index 261f734..2637349 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -12,11 +12,28 @@
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
+struct blk_flush_queue {
+	unsigned int		flush_queue_delayed:1;
+	unsigned int		flush_pending_idx:1;
+	unsigned int		flush_running_idx:1;
+	unsigned long		flush_pending_since;
+	struct list_head	flush_queue[2];
+	struct list_head	flush_data_in_flight;
+	struct request		*flush_rq;
+	spinlock_t		mq_flush_lock;
+};
+
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+static inline struct blk_flush_queue *blk_get_flush_queue(
+		struct request_queue *q)
+{
+	return q->fq;
+}
+
 static inline void __blk_get_queue(struct request_queue *q)
 {
 	kobject_get(&q->kobj);
@@ -91,6 +108,7 @@ void blk_insert_flush(struct request *rq);
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
@@ -113,9 +131,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 		 * should be restarted later. Please see flush_end_io() for
 		 * details.
 		 */
-		if (q->flush_pending_idx != q->flush_running_idx &&
+		if (fq->flush_pending_idx != fq->flush_running_idx &&
 				!queue_flush_queueable(q)) {
-			q->flush_queue_delayed = 1;
+			fq->flush_queue_delayed = 1;
 			return NULL;
 		}
 		if (unlikely(blk_queue_bypass(q)) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e267bf0..49f3461 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -36,6 +36,7 @@ struct request;
 struct sg_io_hdr;
 struct bsg_job;
 struct blkcg_gq;
+struct blk_flush_queue;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -455,14 +456,7 @@ struct request_queue {
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
-	unsigned int		flush_queue_delayed:1;
-	unsigned int		flush_pending_idx:1;
-	unsigned int		flush_running_idx:1;
-	unsigned long		flush_pending_since;
-	struct list_head	flush_queue[2];
-	struct list_head	flush_data_in_flight;
-	struct request		*flush_rq;
-	spinlock_t		mq_flush_lock;
+	struct blk_flush_queue	*fq;
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 6/8] block: flush: avoid to figure out flush queue unnecessarily
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
                   ` (4 preceding siblings ...)
  2014-09-09 13:05 ` [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:44   ` Christoph Hellwig
  2014-09-09 13:05 ` [PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue Ming Lei
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

Just figuring out flush queue at the entry of kicking off flush
machinery and request's completion handler, then pass it through.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-flush.c |   30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 894a149..a9d6e01 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -91,7 +91,8 @@ enum {
 	FLUSH_PENDING_TIMEOUT	= 5 * HZ,
 };
 
-static bool blk_kick_flush(struct request_queue *q);
+static bool blk_kick_flush(struct request_queue *q,
+			   struct blk_flush_queue *fq);
 
 static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
 {
@@ -150,6 +151,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
 /**
  * blk_flush_complete_seq - complete flush sequence
  * @rq: FLUSH/FUA request being sequenced
+ * @fq: flush queue
  * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
  * @error: whether an error occurred
  *
@@ -162,11 +164,11 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  * RETURNS:
  * %true if requests were added to the dispatch queue, %false otherwise.
  */
-static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
-				   int error)
+static bool blk_flush_complete_seq(struct request *rq,
+				   struct blk_flush_queue *fq,
+				   unsigned int seq, int error)
 {
 	struct request_queue *q = rq->q;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	bool queued = false, kicked;
 
@@ -212,7 +214,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 		BUG();
 	}
 
-	kicked = blk_kick_flush(q);
+	kicked = blk_kick_flush(q, fq);
 	return kicked | queued;
 }
 
@@ -244,7 +246,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 		unsigned int seq = blk_flush_cur_seq(rq);
 
 		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
-		queued |= blk_flush_complete_seq(rq, seq, error);
+		queued |= blk_flush_complete_seq(rq, fq, seq, error);
 	}
 
 	/*
@@ -270,6 +272,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 /**
  * blk_kick_flush - consider issuing flush request
  * @q: request_queue being kicked
+ * @fq: flush queue
  *
  * Flush related states of @q have changed, consider issuing flush request.
  * Please read the comment at the top of this file for more info.
@@ -280,9 +283,8 @@ static void flush_end_io(struct request *flush_rq, int error)
  * RETURNS:
  * %true if flush was issued, %false otherwise.
  */
-static bool blk_kick_flush(struct request_queue *q)
+static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 {
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	struct request *first_rq =
 		list_first_entry(pending, struct request, flush.list);
@@ -319,12 +321,13 @@ static bool blk_kick_flush(struct request_queue *q)
 static void flush_data_end_io(struct request *rq, int error)
 {
 	struct request_queue *q = rq->q;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	/*
 	 * After populating an empty queue, kick it to avoid stall.  Read
 	 * the comment in flush_end_io().
 	 */
-	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
 		blk_run_queue_async(q);
 }
 
@@ -344,7 +347,7 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	 * the comment in flush_end_io().
 	 */
 	spin_lock_irqsave(&fq->mq_flush_lock, flags);
-	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
+	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
 		blk_mq_run_hw_queue(hctx, true);
 	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
@@ -366,6 +369,7 @@ void blk_insert_flush(struct request *rq)
 	struct request_queue *q = rq->q;
 	unsigned int fflags = q->flush_flags;	/* may change, cache */
 	unsigned int policy = blk_flush_policy(fflags, rq);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	/*
 	 * @policy now records what operations need to be done.  Adjust
@@ -414,18 +418,16 @@ void blk_insert_flush(struct request *rq)
 	rq->cmd_flags |= REQ_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
 	if (q->mq_ops) {
-		struct blk_flush_queue *fq = blk_get_flush_queue(q);
-
 		rq->end_io = mq_flush_data_end_io;
 
 		spin_lock_irq(&fq->mq_flush_lock);
-		blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
+		blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
 		spin_unlock_irq(&fq->mq_flush_lock);
 		return;
 	}
 	rq->end_io = flush_data_end_io;
 
-	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
+	blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
 }
 
 /**
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
                   ` (5 preceding siblings ...)
  2014-09-09 13:05 ` [PATCH 6/8] block: flush: avoid to figure out flush queue unnecessarily Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 13:05 ` [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery Ming Lei
  2014-09-09 15:20 ` [PATCH 0/8] block: " Christoph Hellwig
  8 siblings, 0 replies; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

This patch adds 'blk_mq_ctx' parameter to blk_get_flush_queue(),
so that this function can find the corresponding blk_flush_queue
bound with current mq context since the flush queue will become
per hw-queue.

For legacy queue, the parameter can be simply 'NULL'.

For multiqueue case, the parameter should be set the context
from which the related request is originated. With the context
info, the hw queue and related flush queue can be found.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-core.c  |    2 +-
 block/blk-flush.c |   17 ++++++++---------
 block/blk-mq.c    |    3 ++-
 block/blk.h       |    4 ++--
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d278a30..40a5d37 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -390,7 +390,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 		 * be drained.  Check all the queues and counters.
 		 */
 		if (drain_all) {
-			struct blk_flush_queue *fq = blk_get_flush_queue(q);
+			struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 			drain |= !list_empty(&q->queue_head);
 			for (i = 0; i < 2; i++) {
 				drain |= q->nr_rqs[i];
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a9d6e01..4a445a1 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -225,7 +225,7 @@ static void flush_end_io(struct request *flush_rq, int error)
 	bool queued = false;
 	struct request *rq, *n;
 	unsigned long flags = 0;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
 
 	if (q->mq_ops) {
 		spin_lock_irqsave(&fq->mq_flush_lock, flags);
@@ -321,7 +321,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 static void flush_data_end_io(struct request *rq, int error)
 {
 	struct request_queue *q = rq->q;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
 	/*
 	 * After populating an empty queue, kick it to avoid stall.  Read
@@ -335,11 +335,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	unsigned long flags;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
 
-	ctx = rq->mq_ctx;
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
 	/*
@@ -369,7 +368,7 @@ void blk_insert_flush(struct request *rq)
 	struct request_queue *q = rq->q;
 	unsigned int fflags = q->flush_flags;	/* may change, cache */
 	unsigned int policy = blk_flush_policy(fflags, rq);
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
 
 	/*
 	 * @policy now records what operations need to be done.  Adjust
@@ -486,7 +485,7 @@ EXPORT_SYMBOL(blkdev_issue_flush);
 static int blk_mq_init_flush(struct request_queue *q)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
 	spin_lock_init(&fq->mq_flush_lock);
 
@@ -500,7 +499,7 @@ static int blk_mq_init_flush(struct request_queue *q)
 
 static void blk_mq_exit_flush(struct request_queue *q)
 {
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 	kfree(fq->flush_rq);
 	kfree(fq);
 }
@@ -542,7 +541,7 @@ void blk_exit_flush(struct request_queue *q)
 	if (q->mq_ops)
 		blk_mq_exit_flush(q);
 	else {
-		struct blk_flush_queue *fq = blk_get_flush_queue(q);
+		struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 		kfree(fq->flush_rq);
 		kfree(fq);
 	}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7782d14..9ca4442 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -518,7 +518,8 @@ static inline bool is_flush_request(struct request *rq,
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	struct request *rq = tags->rqs[tag];
-	struct blk_flush_queue *fq = blk_get_flush_queue(rq->q);
+	/* mq_ctx of flush rq is always cloned from the corresponding req */
+	struct blk_flush_queue *fq = blk_get_flush_queue(rq->q, rq->mq_ctx);
 
 	if (!is_flush_request(rq, fq, tag))
 		return rq;
diff --git a/block/blk.h b/block/blk.h
index 2637349..30f8033 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -29,7 +29,7 @@ extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
 static inline struct blk_flush_queue *blk_get_flush_queue(
-		struct request_queue *q)
+		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
 	return q->fq;
 }
@@ -108,7 +108,7 @@ void blk_insert_flush(struct request *rq);
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
                   ` (6 preceding siblings ...)
  2014-09-09 13:05 ` [PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue Ming Lei
@ 2014-09-09 13:05 ` Ming Lei
  2014-09-09 18:48   ` Christoph Hellwig
  2014-09-09 15:20 ` [PATCH 0/8] block: " Christoph Hellwig
  8 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-09 13:05 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel; +Cc: linux-scsi, Christoph Hellwig, Ming Lei

This patch supports to run one single lush machinery for
each blk-mq dispatch queue, so that:

- current init_request and exit_request callbacks can
cover flush request too, then the ugly and buggy way of
initializing flush request's pdu can be fixed

- flushing performance gets improved in case of multi hw-queue

In both fio write and randwrite test over virtio-blk(4 hw queues,
backed by nullblk) with sync=1, ioengine=sync, iodepth=64, numjobs=4,
it is observed that througput gets increased by 70% in the VM
over my laptop environment.

The multi virtqueue feature isn't merged to QEMU yet, and patches for
the feature can be found in below tree:

	git://kernel.ubuntu.com/ming/qemu.git  	v2.1.0-mq.3

And simply passing 'num_queues=4 vectors=5' should be enough to
enable multi queue feature for QEMU virtio-blk.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
 block/blk-flush.c      |  141 ++++++++++++++++++++++++++++++++++++++----------
 block/blk.h            |   12 ++++-
 include/linux/blk-mq.h |    2 +
 3 files changed, 125 insertions(+), 30 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 4a445a1..2fc79bf 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -482,57 +482,143 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-static int blk_mq_init_flush(struct request_queue *q)
+static int blk_alloc_flush_queue(struct request_queue *q,
+		struct blk_mq_hw_ctx *hctx,
+		struct blk_flush_queue **pfq)
 {
-	struct blk_mq_tag_set *set = q->tag_set;
-	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+	struct blk_flush_queue *fq;
+	int rq_sz = sizeof(struct request);
 
-	spin_lock_init(&fq->mq_flush_lock);
+	if (hctx) {
+		int cmd_sz = q->tag_set->cmd_size;
+		int node = hctx->numa_node;
+
+		fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
+		if (!fq)
+			goto failed;
+
+		rq_sz = round_up(rq_sz + cmd_sz, cache_line_size());
+		fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
+		if (!fq->flush_rq)
+			goto rq_failed;
+
+		spin_lock_init(&fq->mq_flush_lock);
+	} else {
+		fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+		if (!fq)
+			goto failed;
+
+		fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+		if (!fq->flush_rq)
+			goto rq_failed;
+	}
+
+	INIT_LIST_HEAD(&fq->flush_queue[0]);
+	INIT_LIST_HEAD(&fq->flush_queue[1]);
+	INIT_LIST_HEAD(&fq->flush_data_in_flight);
 
-	fq->flush_rq = kzalloc(round_up(sizeof(struct request) +
-				set->cmd_size, cache_line_size()),
-				GFP_KERNEL);
-	if (!fq->flush_rq)
-		return -ENOMEM;
+	*pfq = fq;
 	return 0;
+
+ rq_failed:
+	kfree(fq);
+ failed:
+	return -ENOMEM;
 }
 
-static void blk_mq_exit_flush(struct request_queue *q)
+static void blk_free_flush_queue(struct blk_flush_queue *fq)
 {
-	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+	if (!fq)
+		return;
 	kfree(fq->flush_rq);
 	kfree(fq);
 }
 
-int blk_init_flush(struct request_queue *q)
+static void __blk_mq_exit_flush(struct request_queue *q,
+		unsigned free_end, unsigned int exit_end)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int k;
+	struct blk_flush_queue *fq;
+	struct blk_mq_tag_set *set = q->tag_set;
+	unsigned start_idx = set->queue_depth;
+
+	queue_for_each_hw_ctx(q, hctx, k) {
+		if (k >= free_end)
+			break;
+
+		fq = hctx->fq;
+		if (k < exit_end && set->ops->exit_request)
+			set->ops->exit_request(set->driver_data,
+					fq->flush_rq, k,
+					start_idx + k);
+
+		blk_free_flush_queue(fq);
+	}
+
+}
+
+static int blk_mq_init_flush(struct request_queue *q)
 {
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i, j = 0;
+	struct blk_flush_queue *fq;
 	int ret;
-	struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+	struct blk_mq_tag_set *set = q->tag_set;
+	unsigned start_idx = set->queue_depth;
 
-	if (!fq)
-		return -ENOMEM;
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_alloc_flush_queue(q, hctx, &fq);
+		if (ret)
+			goto fail;
+		hctx->fq = fq;
+	}
 
-	q->fq = fq;
-	INIT_LIST_HEAD(&fq->flush_queue[0]);
-	INIT_LIST_HEAD(&fq->flush_queue[1]);
-	INIT_LIST_HEAD(&fq->flush_data_in_flight);
+	queue_for_each_hw_ctx(q, hctx, j) {
+		fq = hctx->fq;
+		if (set->ops->init_request) {
+			ret = set->ops->init_request(set->driver_data,
+					fq->flush_rq, j, start_idx + j,
+					hctx->numa_node);
+			if (ret)
+				goto fail;
+		}
+	}
+
+	return 0;
+
+ fail:
+	__blk_mq_exit_flush(q, i, j);
+	return ret;
+}
+
+static void blk_mq_exit_flush(struct request_queue *q)
+{
+	struct blk_mq_tag_set *set = q->tag_set;
+
+	__blk_mq_exit_flush(q, set->nr_hw_queues, set->nr_hw_queues);
+}
+
+int blk_init_flush(struct request_queue *q)
+{
+	int ret;
 
 	if (q->mq_ops) {
 		ret = blk_mq_init_flush(q);
 		if (ret)
 			goto failed;
 	} else {
-		ret = -ENOMEM;
-		fq->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
-		if (!fq->flush_rq)
+		struct blk_flush_queue *fq;
+
+		ret = blk_alloc_flush_queue(q, NULL, &fq);
+		if (ret)
 			goto failed;
+		q->fq = fq;
 	}
 
 	return 0;
 
  failed:
-	kfree(fq);
-	q->fq = NULL;
 	return ret;
 }
 
@@ -540,9 +626,6 @@ void blk_exit_flush(struct request_queue *q)
 {
 	if (q->mq_ops)
 		blk_mq_exit_flush(q);
-	else {
-		struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
-		kfree(fq->flush_rq);
-		kfree(fq);
-	}
+	else
+		blk_free_flush_queue(q->fq);
 }
diff --git a/block/blk.h b/block/blk.h
index 30f8033..9dcc11c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -2,6 +2,8 @@
 #define BLK_INTERNAL_H
 
 #include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include "blk-mq.h"
 
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
@@ -31,7 +33,15 @@ extern struct ida blk_queue_ida;
 static inline struct blk_flush_queue *blk_get_flush_queue(
 		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
-	return q->fq;
+	struct blk_mq_hw_ctx *hctx;
+
+	if (!q->mq_ops)
+		return q->fq;
+	WARN_ON(!ctx);
+
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	return hctx->fq;
 }
 
 static inline void __blk_get_queue(struct request_queue *q)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f2..1f3c523 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -4,6 +4,7 @@
 #include <linux/blkdev.h>
 
 struct blk_mq_tags;
+struct blk_flush_queue;
 
 struct blk_mq_cpu_notifier {
 	struct list_head list;
@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {
 
 	struct request_queue	*queue;
 	unsigned int		queue_num;
+	struct blk_flush_queue	*fq;
 
 	void			*driver_data;
 
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/8] block: per-distpatch_queue flush machinery
  2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
                   ` (7 preceding siblings ...)
  2014-09-09 13:05 ` [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery Ming Lei
@ 2014-09-09 15:20 ` Christoph Hellwig
  2014-09-09 15:38   ` Ming Lei
  8 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 15:20 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi

Hi Ming,

thanks for doing this work!

I've only taken a very cursory look at the series and I like most of it.

But can you explain why you're not simply incrementing the number of
allocated requests in the blk-mq request allocation code instead of
allocating the flush request separately in the last patch?

(A more throughout review will follow too)


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/8] block: per-distpatch_queue flush machinery
  2014-09-09 15:20 ` [PATCH 0/8] block: " Christoph Hellwig
@ 2014-09-09 15:38   ` Ming Lei
  0 siblings, 0 replies; 24+ messages in thread
From: Ming Lei @ 2014-09-09 15:38 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List

On Tue, Sep 9, 2014 at 11:20 PM, Christoph Hellwig <hch@lst.de> wrote:
> Hi Ming,
>
> thanks for doing this work!
>
> I've only taken a very cursory look at the series and I like most of it.
>
> But can you explain why you're not simply incrementing the number of
> allocated requests in the blk-mq request allocation code instead of
> allocating the flush request separately in the last patch?

Requests in tag_set may be shared by more than one queues, but flush
rq per hctx can't be shared by request queues at all.

Also it is simple to put it inside the introduce blk_flush_queue.

Thanks,

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush()
  2014-09-09 13:05 ` [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
@ 2014-09-09 18:35   ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:35 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

On Tue, Sep 09, 2014 at 09:05:42PM +0800, Ming Lei wrote:
> It is reasonable to allocate flush req in blk_mq_init_flush().
> 
> Signed-off-by: Ming Lei <ming.lei@canonical.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/8] block: introduce blk_init_flush and its pair
  2014-09-09 13:05 ` [PATCH 2/8] block: introduce blk_init_flush and its pair Ming Lei
@ 2014-09-09 18:36   ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:36 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

On Tue, Sep 09, 2014 at 09:05:43PM +0800, Ming Lei wrote:
> These two functions are introduced to initialize and de-initialize
> flush stuff centrally.
> 
> Signed-off-by: Ming Lei <ming.lei@canonical.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/8] block: move flush initialized stuff to blk_flush_init
  2014-09-09 13:05 ` [PATCH 3/8] block: move flush initialized stuff to blk_flush_init Ming Lei
@ 2014-09-09 18:37   ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:37 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

On Tue, Sep 09, 2014 at 09:05:44PM +0800, Ming Lei wrote:
> These stuff is always used with flush req together, so
> we can do that safely.

Little wording nitpick:

in the subject s/initialized stuff/initialization/

and in the body:

These fields are always used with the flush request, so initialize them
together.

Otherwise looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] block: avoid to use q->flush_rq directly
  2014-09-09 13:05 ` [PATCH 4/8] block: avoid to use q->flush_rq directly Ming Lei
@ 2014-09-09 18:38   ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:38 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

On Tue, Sep 09, 2014 at 09:05:45PM +0800, Ming Lei wrote:
> This patch trys to use local variable to access flush request,
> so that we can convert to per-queue flush machinery a bit easier.
> 
> Signed-off-by: Ming Lei <ming.lei@canonical.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery
  2014-09-09 13:05 ` [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery Ming Lei
@ 2014-09-09 18:43   ` Christoph Hellwig
  2014-09-09 18:55     ` Jens Axboe
  2014-09-10  1:23     ` Ming Lei
  0 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:43 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

On Tue, Sep 09, 2014 at 09:05:46PM +0800, Ming Lei wrote:
> This patch introduces 'struct blk_flush_queue' and puts all
> flush machinery related stuff into this strcuture, so that

s/stuff/fields/
s/strcuture/structure/

Looks good, but a few more nitpicks below.

Reviewed-by: Christoph Hellwig <hch@lst.de>

> +int blk_init_flush(struct request_queue *q)
> +{
> +	int ret;
> +	struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
>  
> +	if (!fq)
>  		return -ENOMEM;
>  
> +	q->fq = fq;

I think it would be cleaner to return the flush data structure and
assign it in the caller.

> +	INIT_LIST_HEAD(&fq->flush_queue[0]);
> +	INIT_LIST_HEAD(&fq->flush_queue[1]);
> +	INIT_LIST_HEAD(&fq->flush_data_in_flight);
> +
> +	if (q->mq_ops) {
> +		ret = blk_mq_init_flush(q);

I think we can just remove blk_mq_init_flush now that it's only
called in blk-flush.c anyway.

>  void blk_exit_flush(struct request_queue *q)
>  {
> +	if (q->mq_ops)
> +		blk_mq_exit_flush(q);
> +	else {
> +		struct blk_flush_queue *fq = blk_get_flush_queue(q);
> +		kfree(fq->flush_rq);
> +		kfree(fq);
> +	}

Similarly I would pass the flush structure here.

> +struct blk_flush_queue {
> +	unsigned int		flush_queue_delayed:1;
> +	unsigned int		flush_pending_idx:1;
> +	unsigned int		flush_running_idx:1;
> +	unsigned long		flush_pending_since;
> +	struct list_head	flush_queue[2];
> +	struct list_head	flush_data_in_flight;
> +	struct request		*flush_rq;
> +	spinlock_t		mq_flush_lock;
> +};

As this isn't really a queue I would call it blk_flush_data.

> +static inline struct blk_flush_queue *blk_get_flush_queue(
> +		struct request_queue *q)
> +{
> +	return q->fq;
> +}

I don't think there is a need for this helper.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 6/8] block: flush: avoid to figure out flush queue unnecessarily
  2014-09-09 13:05 ` [PATCH 6/8] block: flush: avoid to figure out flush queue unnecessarily Ming Lei
@ 2014-09-09 18:44   ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:44 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery
  2014-09-09 13:05 ` [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery Ming Lei
@ 2014-09-09 18:48   ` Christoph Hellwig
  2014-09-10  1:40     ` Ming Lei
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-09 18:48 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-kernel, linux-scsi, Christoph Hellwig

> +	if (hctx) {
> +		int cmd_sz = q->tag_set->cmd_size;
> +		int node = hctx->numa_node;
> +
> +		fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
> +		if (!fq)
> +			goto failed;
> +
> +		rq_sz = round_up(rq_sz + cmd_sz, cache_line_size());
> +		fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
> +		if (!fq->flush_rq)
> +			goto rq_failed;
> +
> +		spin_lock_init(&fq->mq_flush_lock);
> +	} else {
> +		fq = kzalloc(sizeof(*fq), GFP_KERNEL);
> +		if (!fq)
> +			goto failed;
> +
> +		fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
> +		if (!fq->flush_rq)
> +			goto rq_failed;
> +	}

Seems like this would be a lot cleaner by passing the cmd_size and
node_id explicitly.  The added benefit would be that we could also
pass the node for the blk_init_queue_node() case.

> +static void __blk_mq_exit_flush(struct request_queue *q,
> +		unsigned free_end, unsigned int exit_end)
> +{
> +	struct blk_mq_hw_ctx *hctx;
> +	unsigned int k;
> +	struct blk_flush_queue *fq;
> +	struct blk_mq_tag_set *set = q->tag_set;
> +	unsigned start_idx = set->queue_depth;
> +
> +	queue_for_each_hw_ctx(q, hctx, k) {
> +		if (k >= free_end)
> +			break;
> +
> +		fq = hctx->fq;
> +		if (k < exit_end && set->ops->exit_request)
> +			set->ops->exit_request(set->driver_data,
> +					fq->flush_rq, k,
> +					start_idx + k);
> +
> +		blk_free_flush_queue(fq);
> +	}

Can we merge the mq init/exit case into some existing for each hctx
loop in blk-mq?


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery
  2014-09-09 18:43   ` Christoph Hellwig
@ 2014-09-09 18:55     ` Jens Axboe
  2014-09-10  1:25       ` Ming Lei
  2014-09-10  1:23     ` Ming Lei
  1 sibling, 1 reply; 24+ messages in thread
From: Jens Axboe @ 2014-09-09 18:55 UTC (permalink / raw)
  To: Christoph Hellwig, Ming Lei; +Cc: linux-kernel, linux-scsi, Christoph Hellwig

On 09/09/2014 12:43 PM, Christoph Hellwig wrote:
> On Tue, Sep 09, 2014 at 09:05:46PM +0800, Ming Lei wrote:
>> This patch introduces 'struct blk_flush_queue' and puts all
>> flush machinery related stuff into this strcuture, so that
> 
> s/stuff/fields/
> s/strcuture/structure/
> 
> Looks good, but a few more nitpicks below.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> 
>> +int blk_init_flush(struct request_queue *q)
>> +{
>> +	int ret;
>> +	struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
>>  
>> +	if (!fq)
>>  		return -ENOMEM;
>>  
>> +	q->fq = fq;
> 
> I think it would be cleaner to return the flush data structure and
> assign it in the caller.

I was going to suggest renaming because of this as well. If we do this:

	q->fq = blk_init_flush(q);

then it's immediately clear what it does, whereas blk_init_flush(q)
means very little on its own. I'd change the naming to
blk_alloc_flush_queue() and blk_free_flush_queue().

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery
  2014-09-09 18:43   ` Christoph Hellwig
  2014-09-09 18:55     ` Jens Axboe
@ 2014-09-10  1:23     ` Ming Lei
  1 sibling, 0 replies; 24+ messages in thread
From: Ming Lei @ 2014-09-10  1:23 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List,
	Christoph Hellwig

On Wed, Sep 10, 2014 at 2:43 AM, Christoph Hellwig <hch@infradead.org> wrote:
> On Tue, Sep 09, 2014 at 09:05:46PM +0800, Ming Lei wrote:
>> This patch introduces 'struct blk_flush_queue' and puts all
>> flush machinery related stuff into this strcuture, so that
>
> s/stuff/fields/
> s/strcuture/structure/
>
> Looks good, but a few more nitpicks below.
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
>
>> +int blk_init_flush(struct request_queue *q)
>> +{
>> +     int ret;
>> +     struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
>>
>> +     if (!fq)
>>               return -ENOMEM;
>>
>> +     q->fq = fq;
>
> I think it would be cleaner to return the flush data structure and
> assign it in the caller.

OK, and most of this part is transitional.

>
>> +     INIT_LIST_HEAD(&fq->flush_queue[0]);
>> +     INIT_LIST_HEAD(&fq->flush_queue[1]);
>> +     INIT_LIST_HEAD(&fq->flush_data_in_flight);
>> +
>> +     if (q->mq_ops) {
>> +             ret = blk_mq_init_flush(q);
>
> I think we can just remove blk_mq_init_flush now that it's only
> called in blk-flush.c anyway.

blk_mq_init_flush() will become bigger in following patch.

>>  void blk_exit_flush(struct request_queue *q)
>>  {
>> +     if (q->mq_ops)
>> +             blk_mq_exit_flush(q);
>> +     else {
>> +             struct blk_flush_queue *fq = blk_get_flush_queue(q);
>> +             kfree(fq->flush_rq);
>> +             kfree(fq);
>> +     }
>
> Similarly I would pass the flush structure here.

OK.

>
>> +struct blk_flush_queue {
>> +     unsigned int            flush_queue_delayed:1;
>> +     unsigned int            flush_pending_idx:1;
>> +     unsigned int            flush_running_idx:1;
>> +     unsigned long           flush_pending_since;
>> +     struct list_head        flush_queue[2];
>> +     struct list_head        flush_data_in_flight;
>> +     struct request          *flush_rq;
>> +     spinlock_t              mq_flush_lock;
>> +};
>
> As this isn't really a queue I would call it blk_flush_data.

It is sort of a queue since there is a double buffer flush queue.

>
>> +static inline struct blk_flush_queue *blk_get_flush_queue(
>> +             struct request_queue *q)
>> +{
>> +     return q->fq;
>> +}
>
> I don't think there is a need for this helper.

No, the helper can simplify the following patch a lot since
the flush queue data is always obtained from this helper in both
legacy and mq case, which will take per-hw_queue flush queue.

Thanks,

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery
  2014-09-09 18:55     ` Jens Axboe
@ 2014-09-10  1:25       ` Ming Lei
  0 siblings, 0 replies; 24+ messages in thread
From: Ming Lei @ 2014-09-10  1:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Linux Kernel Mailing List, Linux SCSI List,
	Christoph Hellwig

On Wed, Sep 10, 2014 at 2:55 AM, Jens Axboe <axboe@kernel.dk> wrote:
> On 09/09/2014 12:43 PM, Christoph Hellwig wrote:
>> On Tue, Sep 09, 2014 at 09:05:46PM +0800, Ming Lei wrote:
>>> This patch introduces 'struct blk_flush_queue' and puts all
>>> flush machinery related stuff into this strcuture, so that
>>
>> s/stuff/fields/
>> s/strcuture/structure/
>>
>> Looks good, but a few more nitpicks below.
>>
>> Reviewed-by: Christoph Hellwig <hch@lst.de>
>>
>>> +int blk_init_flush(struct request_queue *q)
>>> +{
>>> +    int ret;
>>> +    struct blk_flush_queue *fq = kzalloc(sizeof(*fq), GFP_KERNEL);
>>>
>>> +    if (!fq)
>>>              return -ENOMEM;
>>>
>>> +    q->fq = fq;
>>
>> I think it would be cleaner to return the flush data structure and
>> assign it in the caller.
>
> I was going to suggest renaming because of this as well. If we do this:
>
>         q->fq = blk_init_flush(q);
>
> then it's immediately clear what it does, whereas blk_init_flush(q)
> means very little on its own. I'd change the naming to
> blk_alloc_flush_queue() and blk_free_flush_queue().

Exactly these two functions are introduced in patch 8/8, and I
will try to name them earlier in V1.

Thanks,

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery
  2014-09-09 18:48   ` Christoph Hellwig
@ 2014-09-10  1:40     ` Ming Lei
  2014-09-10 19:02       ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: Ming Lei @ 2014-09-10  1:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List,
	Christoph Hellwig

On Wed, Sep 10, 2014 at 2:48 AM, Christoph Hellwig <hch@infradead.org> wrote:
>> +     if (hctx) {
>> +             int cmd_sz = q->tag_set->cmd_size;
>> +             int node = hctx->numa_node;
>> +
>> +             fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
>> +             if (!fq)
>> +                     goto failed;
>> +
>> +             rq_sz = round_up(rq_sz + cmd_sz, cache_line_size());
>> +             fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
>> +             if (!fq->flush_rq)
>> +                     goto rq_failed;
>> +
>> +             spin_lock_init(&fq->mq_flush_lock);
>> +     } else {
>> +             fq = kzalloc(sizeof(*fq), GFP_KERNEL);
>> +             if (!fq)
>> +                     goto failed;
>> +
>> +             fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
>> +             if (!fq->flush_rq)
>> +                     goto rq_failed;
>> +     }
>
> Seems like this would be a lot cleaner by passing the cmd_size and
> node_id explicitly.  The added benefit would be that we could also
> pass the node for the blk_init_queue_node() case.

OK.

>
>> +static void __blk_mq_exit_flush(struct request_queue *q,
>> +             unsigned free_end, unsigned int exit_end)
>> +{
>> +     struct blk_mq_hw_ctx *hctx;
>> +     unsigned int k;
>> +     struct blk_flush_queue *fq;
>> +     struct blk_mq_tag_set *set = q->tag_set;
>> +     unsigned start_idx = set->queue_depth;
>> +
>> +     queue_for_each_hw_ctx(q, hctx, k) {
>> +             if (k >= free_end)
>> +                     break;
>> +
>> +             fq = hctx->fq;
>> +             if (k < exit_end && set->ops->exit_request)
>> +                     set->ops->exit_request(set->driver_data,
>> +                                     fq->flush_rq, k,
>> +                                     start_idx + k);
>> +
>> +             blk_free_flush_queue(fq);
>> +     }
>
> Can we merge the mq init/exit case into some existing for each hctx
> loop in blk-mq?

I am wondering we can do that because lifetime is totally different
between flush requests and tag_set requests which are initialized
before request queue is created.

Thanks,

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery
  2014-09-10  1:40     ` Ming Lei
@ 2014-09-10 19:02       ` Christoph Hellwig
  2014-09-10 23:53         ` Ming Lei
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2014-09-10 19:02 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
	Linux SCSI List, Christoph Hellwig

On Wed, Sep 10, 2014 at 09:40:11AM +0800, Ming Lei wrote:
> I am wondering we can do that because lifetime is totally different
> between flush requests and tag_set requests which are initialized
> before request queue is created.

We shouldn't do it in the tag sets, but where we allocate and free
each hctx: blk_mq_init_queue and blk_mq_free_hw_queues.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery
  2014-09-10 19:02       ` Christoph Hellwig
@ 2014-09-10 23:53         ` Ming Lei
  0 siblings, 0 replies; 24+ messages in thread
From: Ming Lei @ 2014-09-10 23:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Linux Kernel Mailing List, Linux SCSI List,
	Christoph Hellwig

On Thu, Sep 11, 2014 at 3:02 AM, Christoph Hellwig <hch@infradead.org> wrote:
> On Wed, Sep 10, 2014 at 09:40:11AM +0800, Ming Lei wrote:
>> I am wondering we can do that because lifetime is totally different
>> between flush requests and tag_set requests which are initialized
>> before request queue is created.
>
> We shouldn't do it in the tag sets, but where we allocate and free
> each hctx: blk_mq_init_queue and blk_mq_free_hw_queues.

That should work, but both flush queue's allocation and .init_request()
have to move to the function because hctx->numa_node is basically
ready in blk_mq_init_queue().  Then blk_init_flush() only need to allocate
the data for legacy case.


Thanks,

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2014-09-10 23:53 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-09 13:05 [PATCH 0/8] block: per-distpatch_queue flush machinery Ming Lei
2014-09-09 13:05 ` [PATCH 1/8] blk-mq: allocate flush_rq in blk_mq_init_flush() Ming Lei
2014-09-09 18:35   ` Christoph Hellwig
2014-09-09 13:05 ` [PATCH 2/8] block: introduce blk_init_flush and its pair Ming Lei
2014-09-09 18:36   ` Christoph Hellwig
2014-09-09 13:05 ` [PATCH 3/8] block: move flush initialized stuff to blk_flush_init Ming Lei
2014-09-09 18:37   ` Christoph Hellwig
2014-09-09 13:05 ` [PATCH 4/8] block: avoid to use q->flush_rq directly Ming Lei
2014-09-09 18:38   ` Christoph Hellwig
2014-09-09 13:05 ` [PATCH 5/8] block: introduce blk_flush_queue to drive flush machinery Ming Lei
2014-09-09 18:43   ` Christoph Hellwig
2014-09-09 18:55     ` Jens Axboe
2014-09-10  1:25       ` Ming Lei
2014-09-10  1:23     ` Ming Lei
2014-09-09 13:05 ` [PATCH 6/8] block: flush: avoid to figure out flush queue unnecessarily Ming Lei
2014-09-09 18:44   ` Christoph Hellwig
2014-09-09 13:05 ` [PATCH 7/8] block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue Ming Lei
2014-09-09 13:05 ` [PATCH 8/8] blk-mq: support per-distpatch_queue flush machinery Ming Lei
2014-09-09 18:48   ` Christoph Hellwig
2014-09-10  1:40     ` Ming Lei
2014-09-10 19:02       ` Christoph Hellwig
2014-09-10 23:53         ` Ming Lei
2014-09-09 15:20 ` [PATCH 0/8] block: " Christoph Hellwig
2014-09-09 15:38   ` Ming Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.