All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling
@ 2017-05-10 15:55 Ming Lei
  2017-05-10 15:55 ` [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG Ming Lei
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei

Hi,

This patchset introduces flag of BLK_MQ_F_SCHED_USE_HW_TAG and
allows to use hardware tag directly for IO scheduling if the queue's
depth is big enough. In this way, we can avoid to allocate extra tags
and request pool for IO schedule, and the schedule tag allocation/release
can be saved in I/O submit path.

V3:
	- respect q->nr_requests by resizing hw tags, as suggested by Omar

V2:
	- fix oops when kyber is used
	- move dumping the new flag into patch 1
	- support to use hw tag for shared tags
	- update hctx->sched_tag when BLK_MQ_F_SCHED_USE_HW_TAG is changed
	- clear the flag in patch of blk_mq_exit_sched()
	- don't update q->nr_requests when updating hw queue's depth
	- fix blk_mq_get_queue_depth()

Ming Lei (4):
  blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG
  blk-mq: introduce blk_mq_get_queue_depth()
  blk-mq: use hw tag for scheduling if hw tag space is big enough
  blk-mq: allow to use hw tag for shared tags

 block/blk-mq-debugfs.c |   1 +
 block/blk-mq-sched.c   |  90 ++++++++++++++++++++++++++++++++++++++++----
 block/blk-mq-sched.h   |   9 +++++
 block/blk-mq.c         | 100 +++++++++++++++++++++++++++++++++++++++++++++----
 block/blk-mq.h         |  24 ++++++++++++
 block/kyber-iosched.c  |   7 +++-
 include/linux/blk-mq.h |   1 +
 include/linux/blkdev.h |   8 ++++
 8 files changed, 223 insertions(+), 17 deletions(-)

-- 
2.9.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG
  2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
  2017-05-10 15:55 ` [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth() Ming Lei
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei

When blk-mq I/O scheduler is used, we need two tags for
submitting one request. One is called scheduler tag for
allocating request and scheduling I/O, another one is called
driver tag, which is used for dispatching IO to hardware/driver.
This way introduces one extra per-queue allocation for both tags
and request pool, and may not be as efficient as case of none
scheduler.

Also currently we put a default per-hctx limit on schedulable
requests, and this limit may be a bottleneck for some devices,
especialy when these devices have a quite big tag space.

This patch introduces BLK_MQ_F_SCHED_USE_HW_TAG so that we can
allow to use hardware/driver tags directly for IO scheduling if
devices's hardware tag space is big enough. Then we can avoid
the extra resource allocation and make IO submission more
efficient.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq-debugfs.c |  1 +
 block/blk-mq-sched.c   | 10 +++++++++-
 block/blk-mq.c         | 35 +++++++++++++++++++++++++++++------
 block/kyber-iosched.c  |  7 ++++++-
 include/linux/blk-mq.h |  1 +
 5 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4d7221..c01edf822a66 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -184,6 +184,7 @@ static const char *const hctx_flag_name[] = {
 	HCTX_FLAG_NAME(SG_MERGE),
 	HCTX_FLAG_NAME(BLOCKING),
 	HCTX_FLAG_NAME(NO_SCHED),
+	HCTX_FLAG_NAME(SCHED_USE_HW_TAG),
 };
 #undef HCTX_FLAG_NAME
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..2c5981ff9e04 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -84,7 +84,12 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
 	if (e) {
-		data->flags |= BLK_MQ_REQ_INTERNAL;
+		/*
+		 * If BLK_MQ_F_SCHED_USE_HW_TAG is set, we use hardware
+		 * tag for IO scheduler directly.
+		 */
+		if (!(data->hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG))
+			data->flags |= BLK_MQ_REQ_INTERNAL;
 
 		/*
 		 * Flush requests are special and go directly to the
@@ -430,6 +435,9 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
 	struct blk_mq_tag_set *set = q->tag_set;
 	int ret;
 
+	if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG)
+		return 0;
+
 	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
 					       set->reserved_tags);
 	if (!hctx->sched_tags)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5d4ce7eb8dbf..50b968fa4922 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -265,9 +265,19 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 				rq->rq_flags = RQF_MQ_INFLIGHT;
 				atomic_inc(&data->hctx->nr_active);
 			}
-			rq->tag = tag;
-			rq->internal_tag = -1;
-			data->hctx->tags->rqs[rq->tag] = rq;
+			data->hctx->tags->rqs[tag] = rq;
+
+			/*
+			 * If we use hw tag for scheduling, postpone setting
+			 * rq->tag in blk_mq_get_driver_tag().
+			 */
+			if (data->hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+				rq->tag = -1;
+				rq->internal_tag = tag;
+			} else {
+				rq->tag = tag;
+				rq->internal_tag = -1;
+			}
 		}
 
 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
@@ -367,7 +377,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
 	if (rq->tag != -1)
 		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
-	if (sched_tag != -1)
+	if (sched_tag != -1 && !(hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG))
 		blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
 	blk_mq_sched_restart(hctx);
 	blk_queue_exit(q);
@@ -871,6 +881,12 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 	if (rq->tag != -1)
 		goto done;
 
+	/* we buffered driver tag in rq->internal_tag */
+	if (data.hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+		rq->tag = rq->internal_tag;
+		goto done;
+	}
+
 	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
 		data.flags |= BLK_MQ_REQ_RESERVED;
 
@@ -892,9 +908,15 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
 				    struct request *rq)
 {
-	blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
+	unsigned int tag = rq->tag;
+
 	rq->tag = -1;
 
+	if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG)
+		return;
+
+	blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, tag);
+
 	if (rq->rq_flags & RQF_MQ_INFLIGHT) {
 		rq->rq_flags &= ~RQF_MQ_INFLIGHT;
 		atomic_dec(&hctx->nr_active);
@@ -2869,7 +2891,8 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 		blk_flush_plug_list(plug, false);
 
 	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
-	if (!blk_qc_t_is_internal(cookie))
+	if (!blk_qc_t_is_internal(cookie) || (hctx->flags &
+			BLK_MQ_F_SCHED_USE_HW_TAG))
 		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
 	else {
 		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..603c52682311 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -276,8 +276,13 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
 	/*
 	 * All of the hardware queues have the same depth, so we can just grab
 	 * the shift of the first one.
+	 *
+	 * Hardware tags may be used for scheduling.
 	 */
-	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+	if (kqd->q->queue_hw_ctx[0]->sched_tags)
+		return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+	else
+		return kqd->q->queue_hw_ctx[0]->tags->bitmap_tags.sb.shift;
 }
 
 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa248c640..ae1faa0d8a95 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -164,6 +164,7 @@ enum {
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_BLOCKING	= 1 << 5,
 	BLK_MQ_F_NO_SCHED	= 1 << 6,
+	BLK_MQ_F_SCHED_USE_HW_TAG	= 1 << 7,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth()
  2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
  2017-05-10 15:55 ` [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
  2017-05-10 15:55 ` [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough Ming Lei
  2017-05-10 15:55 ` [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags Ming Lei
  3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei

The hardware queue depth can be resized via blk_mq_update_nr_requests(),
so introduce this helper for retrieving queue's depth easily.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c | 15 +++++++++++++++
 block/blk-mq.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 50b968fa4922..1a61ca611fae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2135,6 +2135,21 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 }
 
+/*
+ * Queue depth can be changed via blk_mq_update_nr_requests(),
+ * so use this helper to retrieve queue's depth.
+ */
+int blk_mq_get_queue_depth(struct request_queue *q)
+{
+	/*
+	 * All queues have same queue depth, need to revisit
+	 * if per hw-queue depth is supported.
+	 */
+	struct blk_mq_tags	*tags = q->tag_set->tags[0];
+
+	return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
+}
+
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..d49d46de2923 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -138,6 +138,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 void blk_mq_finish_request(struct request *rq);
 struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 					unsigned int op);
+int blk_mq_get_queue_depth(struct request_queue *q);
 
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
 {
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough
  2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
  2017-05-10 15:55 ` [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG Ming Lei
  2017-05-10 15:55 ` [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth() Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
  2017-05-10 15:55 ` [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags Ming Lei
  3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei

When tag space of one device is big enough, we use hw tag
directly for I/O scheduling.

Now the decision is made if hw queue depth is not less than
q->nr_requests and the tag set isn't shared.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq-sched.c   | 80 +++++++++++++++++++++++++++++++++++++++++++++-----
 block/blk-mq-sched.h   |  8 +++++
 block/blk-mq.c         | 35 ++++++++++++++++++++--
 include/linux/blkdev.h |  8 +++++
 4 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 2c5981ff9e04..a7e125a40e0a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -417,9 +417,9 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
 	blk_mq_run_hw_queue(hctx, run_queue_async);
 }
 
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
-				   struct blk_mq_hw_ctx *hctx,
-				   unsigned int hctx_idx)
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+			    struct blk_mq_hw_ctx *hctx,
+			    unsigned int hctx_idx)
 {
 	if (hctx->sched_tags) {
 		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
@@ -428,9 +428,9 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 	}
 }
 
-static int blk_mq_sched_alloc_tags(struct request_queue *q,
-				   struct blk_mq_hw_ctx *hctx,
-				   unsigned int hctx_idx)
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+			    struct blk_mq_hw_ctx *hctx,
+			    unsigned int hctx_idx)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
 	int ret;
@@ -450,14 +450,52 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
 	return ret;
 }
 
+static int blk_mq_set_queue_depth(struct blk_mq_hw_ctx *hctx,
+				  unsigned int nr)
+{
+	if (!hctx->tags)
+		return -EINVAL;
+
+	return blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false);
+}
+
+static int blk_mq_set_queues_depth(struct request_queue *q,
+				   unsigned int nr)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i, j, ret;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_mq_set_queue_depth(hctx, nr);
+		if (ret)
+			goto recovery;
+	}
+	return 0;
+
+ recovery:
+	queue_for_each_hw_ctx(q, hctx, j) {
+		if (j >= i)
+			break;
+		blk_mq_tag_update_depth(hctx, &hctx->tags,
+					q->act_hw_queue_depth,
+					false);
+	}
+	return ret;
+}
+
 static void blk_mq_sched_tags_teardown(struct request_queue *q)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i)
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+			blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+			hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+		}
 		blk_mq_sched_free_tags(set, hctx, i);
+	}
 }
 
 int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
@@ -504,12 +542,28 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
 }
 
+/*
+ * If this queue has enough hardware tags and doesn't share tags with
+ * other queues, just use hw tag directly for scheduling.
+ */
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q)
+{
+	if (q->tag_set->flags & BLK_MQ_F_TAG_SHARED)
+		return false;
+
+	if (q->act_hw_queue_depth < q->nr_requests)
+		return false;
+
+	return true;
+}
+
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 {
 	struct blk_mq_hw_ctx *hctx;
 	struct elevator_queue *eq;
 	unsigned int i;
 	int ret;
+	bool auto_hw_tag;
 
 	if (!e) {
 		q->elevator = NULL;
@@ -522,7 +576,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	 */
 	q->nr_requests = 2 * BLKDEV_MAX_RQ;
 
+	auto_hw_tag = blk_mq_sched_may_use_hw_tag(q);
+	if (auto_hw_tag) {
+		q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+		if (blk_mq_set_queues_depth(q, q->nr_requests))
+			auto_hw_tag = false;
+	}
+
 	queue_for_each_hw_ctx(q, hctx, i) {
+		if (auto_hw_tag)
+			hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+		else
+			hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+
 		ret = blk_mq_sched_alloc_tags(q, hctx, i);
 		if (ret)
 			goto err;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..bbfc1ea5fafa 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -25,6 +25,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q);
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
 
@@ -35,6 +36,13 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 
 int blk_mq_sched_init(struct request_queue *q);
 
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+			    struct blk_mq_hw_ctx *hctx,
+			    unsigned int hctx_idx);
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+			    struct blk_mq_hw_ctx *hctx,
+			    unsigned int hctx_idx);
+
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1a61ca611fae..e02fa8d078e6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2150,6 +2150,34 @@ int blk_mq_get_queue_depth(struct request_queue *q)
 	return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
 }
 
+static void blk_mq_update_sched_flag(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	if (!q->elevator)
+		return;
+
+	if (!blk_mq_sched_may_use_hw_tag(q))
+		queue_for_each_hw_ctx(q, hctx, i) {
+			if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+				blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+				hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+			}
+			if (!hctx->sched_tags) {
+				if (blk_mq_sched_alloc_tags(q, hctx, i))
+					goto force_use_hw_tag;
+			}
+		}
+	else
+ force_use_hw_tag:
+		queue_for_each_hw_ctx(q, hctx, i) {
+			hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+			if (hctx->sched_tags)
+				blk_mq_sched_free_tags(q->tag_set, hctx, i);
+		}
+}
+
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -2366,7 +2394,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	/*
 	 * Do this after blk_queue_make_request() overrides it...
 	 */
-	q->nr_requests = set->queue_depth;
+	q->act_hw_queue_depth = q->nr_requests = set->queue_depth;
 
 	/*
 	 * Default to classic polling
@@ -2689,8 +2717,11 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 			break;
 	}
 
-	if (!ret)
+	if (!ret) {
 		q->nr_requests = nr;
+		q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+		blk_mq_update_sched_flag(q);
+	}
 
 	blk_mq_unfreeze_queue(q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b5d1e27631ee..7389e388d583 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -428,6 +428,14 @@ struct request_queue {
 	unsigned int		nr_hw_queues;
 
 	/*
+	 * save active hw queue depth before using hw tag for scheduling,
+	 * this need to revisit if per hw queue depth is supported.
+	 *
+	 * Only used by blk-mq-sched.
+	 */
+	unsigned int		act_hw_queue_depth;
+
+	/*
 	 * Dispatch queue sorting
 	 */
 	sector_t		end_sector;
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags
  2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
                   ` (2 preceding siblings ...)
  2017-05-10 15:55 ` [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
  3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei

In case of shared tags, hctx_may_queue() limits that
the maximum number of requests allocated to one hw
queue is .queue_depth / active_queues.

So we try to allow to use hw tag for this case
if .queue_depth/shared_queues is not less than
q->nr_requests.

This can cover some scsi devices too, such as virtio-scsi
in default configuration.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq-sched.c | 16 ++++++++--------
 block/blk-mq-sched.h |  1 +
 block/blk-mq.c       | 21 ++++++++++++++++++---
 block/blk-mq.h       | 23 +++++++++++++++++++++++
 4 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a7e125a40e0a..f2114eb3eebb 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -459,8 +459,7 @@ static int blk_mq_set_queue_depth(struct blk_mq_hw_ctx *hctx,
 	return blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false);
 }
 
-static int blk_mq_set_queues_depth(struct request_queue *q,
-				   unsigned int nr)
+int blk_mq_set_queues_depth(struct request_queue *q, unsigned int nr)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i, j, ret;
@@ -543,15 +542,14 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 }
 
 /*
- * If this queue has enough hardware tags and doesn't share tags with
- * other queues, just use hw tag directly for scheduling.
+ * If this queue has enough hardware tags, just use hw tag directly
+ * for scheduling.
  */
 bool blk_mq_sched_may_use_hw_tag(struct request_queue *q)
 {
-	if (q->tag_set->flags & BLK_MQ_F_TAG_SHARED)
-		return false;
+	int nr_shared = blk_mq_get_shared_queues(q);
 
-	if (q->act_hw_queue_depth < q->nr_requests)
+	if ((q->act_hw_queue_depth / nr_shared) < q->nr_requests)
 		return false;
 
 	return true;
@@ -578,8 +576,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 
 	auto_hw_tag = blk_mq_sched_may_use_hw_tag(q);
 	if (auto_hw_tag) {
+		unsigned int nr_shared = blk_mq_get_shared_queues(q);
+
 		q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
-		if (blk_mq_set_queues_depth(q, q->nr_requests))
+		if (blk_mq_set_queues_depth(q, q->nr_requests * nr_shared))
 			auto_hw_tag = false;
 	}
 
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index bbfc1ea5fafa..6deca4f9e656 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -26,6 +26,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 
 bool blk_mq_sched_may_use_hw_tag(struct request_queue *q);
+int blk_mq_set_queues_depth(struct request_queue *q, unsigned int nr);
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e02fa8d078e6..401a04388ac9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2150,15 +2150,17 @@ int blk_mq_get_queue_depth(struct request_queue *q)
 	return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
 }
 
-static void blk_mq_update_sched_flag(struct request_queue *q)
+static bool blk_mq_update_sched_flag(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
+	bool use_hw_tag;
 
 	if (!q->elevator)
-		return;
+		return false;
 
-	if (!blk_mq_sched_may_use_hw_tag(q))
+	use_hw_tag = blk_mq_sched_may_use_hw_tag(q);
+	if (!use_hw_tag)
 		queue_for_each_hw_ctx(q, hctx, i) {
 			if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
 				blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
@@ -2176,6 +2178,16 @@ static void blk_mq_update_sched_flag(struct request_queue *q)
 			if (hctx->sched_tags)
 				blk_mq_sched_free_tags(q->tag_set, hctx, i);
 		}
+	return use_hw_tag;
+}
+
+static void blk_mq_update_for_sched(struct request_queue *q)
+{
+	if (!blk_mq_update_sched_flag(q))
+		return;
+
+	blk_mq_set_queues_depth(q, q->nr_requests *
+				__blk_mq_get_shared_queues(q));
 }
 
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
@@ -2217,6 +2229,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
 		/* update existing queue */
 		blk_mq_update_tag_set_depth(set, false);
 	}
+
+	blk_mq_update_for_sched(q);
 	mutex_unlock(&set->tag_list_lock);
 
 	synchronize_rcu();
@@ -2239,6 +2253,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
 		queue_set_hctx_shared(q, true);
 	list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
 
+	blk_mq_update_for_sched(q);
 	mutex_unlock(&set->tag_list_lock);
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d49d46de2923..3fd869bee744 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -150,4 +150,27 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
 	return hctx->nr_ctx && hctx->tags;
 }
 
+/* return how many queues shared tag set with me */
+static inline int __blk_mq_get_shared_queues(struct request_queue *q)
+{
+	struct blk_mq_tag_set *set = q->tag_set;
+	int nr = 0;
+
+	list_for_each_entry_rcu(q, &set->tag_list, tag_set_list)
+		nr++;
+	return nr;
+}
+
+static inline int blk_mq_get_shared_queues(struct request_queue *q)
+{
+	int nr = 0;
+	struct blk_mq_tag_set *set = q->tag_set;
+
+	mutex_lock(&set->tag_list_lock);
+	nr = __blk_mq_get_shared_queues(q);
+	mutex_unlock(&set->tag_list_lock);
+
+	return nr;
+}
+
 #endif
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-05-10 15:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
2017-05-10 15:55 ` [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG Ming Lei
2017-05-10 15:55 ` [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth() Ming Lei
2017-05-10 15:55 ` [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough Ming Lei
2017-05-10 15:55 ` [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags Ming Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.