* [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG
2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
2017-05-10 15:55 ` [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth() Ming Lei
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei
When blk-mq I/O scheduler is used, we need two tags for
submitting one request. One is called scheduler tag for
allocating request and scheduling I/O, another one is called
driver tag, which is used for dispatching IO to hardware/driver.
This way introduces one extra per-queue allocation for both tags
and request pool, and may not be as efficient as case of none
scheduler.
Also currently we put a default per-hctx limit on schedulable
requests, and this limit may be a bottleneck for some devices,
especialy when these devices have a quite big tag space.
This patch introduces BLK_MQ_F_SCHED_USE_HW_TAG so that we can
allow to use hardware/driver tags directly for IO scheduling if
devices's hardware tag space is big enough. Then we can avoid
the extra resource allocation and make IO submission more
efficient.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq-debugfs.c | 1 +
block/blk-mq-sched.c | 10 +++++++++-
block/blk-mq.c | 35 +++++++++++++++++++++++++++++------
block/kyber-iosched.c | 7 ++++++-
include/linux/blk-mq.h | 1 +
5 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4d7221..c01edf822a66 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -184,6 +184,7 @@ static const char *const hctx_flag_name[] = {
HCTX_FLAG_NAME(SG_MERGE),
HCTX_FLAG_NAME(BLOCKING),
HCTX_FLAG_NAME(NO_SCHED),
+ HCTX_FLAG_NAME(SCHED_USE_HW_TAG),
};
#undef HCTX_FLAG_NAME
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..2c5981ff9e04 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -84,7 +84,12 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
if (e) {
- data->flags |= BLK_MQ_REQ_INTERNAL;
+ /*
+ * If BLK_MQ_F_SCHED_USE_HW_TAG is set, we use hardware
+ * tag for IO scheduler directly.
+ */
+ if (!(data->hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG))
+ data->flags |= BLK_MQ_REQ_INTERNAL;
/*
* Flush requests are special and go directly to the
@@ -430,6 +435,9 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
struct blk_mq_tag_set *set = q->tag_set;
int ret;
+ if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG)
+ return 0;
+
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
set->reserved_tags);
if (!hctx->sched_tags)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5d4ce7eb8dbf..50b968fa4922 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -265,9 +265,19 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
rq->rq_flags = RQF_MQ_INFLIGHT;
atomic_inc(&data->hctx->nr_active);
}
- rq->tag = tag;
- rq->internal_tag = -1;
- data->hctx->tags->rqs[rq->tag] = rq;
+ data->hctx->tags->rqs[tag] = rq;
+
+ /*
+ * If we use hw tag for scheduling, postpone setting
+ * rq->tag in blk_mq_get_driver_tag().
+ */
+ if (data->hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+ rq->tag = -1;
+ rq->internal_tag = tag;
+ } else {
+ rq->tag = tag;
+ rq->internal_tag = -1;
+ }
}
blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
@@ -367,7 +377,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
if (rq->tag != -1)
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
- if (sched_tag != -1)
+ if (sched_tag != -1 && !(hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG))
blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
blk_mq_sched_restart(hctx);
blk_queue_exit(q);
@@ -871,6 +881,12 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
if (rq->tag != -1)
goto done;
+ /* we buffered driver tag in rq->internal_tag */
+ if (data.hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+ rq->tag = rq->internal_tag;
+ goto done;
+ }
+
if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
data.flags |= BLK_MQ_REQ_RESERVED;
@@ -892,9 +908,15 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
- blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
+ unsigned int tag = rq->tag;
+
rq->tag = -1;
+ if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG)
+ return;
+
+ blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, tag);
+
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
rq->rq_flags &= ~RQF_MQ_INFLIGHT;
atomic_dec(&hctx->nr_active);
@@ -2869,7 +2891,8 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
blk_flush_plug_list(plug, false);
hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
- if (!blk_qc_t_is_internal(cookie))
+ if (!blk_qc_t_is_internal(cookie) || (hctx->flags &
+ BLK_MQ_F_SCHED_USE_HW_TAG))
rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
else {
rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..603c52682311 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -276,8 +276,13 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
/*
* All of the hardware queues have the same depth, so we can just grab
* the shift of the first one.
+ *
+ * Hardware tags may be used for scheduling.
*/
- return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+ if (kqd->q->queue_hw_ctx[0]->sched_tags)
+ return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+ else
+ return kqd->q->queue_hw_ctx[0]->tags->bitmap_tags.sb.shift;
}
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa248c640..ae1faa0d8a95 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -164,6 +164,7 @@ enum {
BLK_MQ_F_SG_MERGE = 1 << 2,
BLK_MQ_F_BLOCKING = 1 << 5,
BLK_MQ_F_NO_SCHED = 1 << 6,
+ BLK_MQ_F_SCHED_USE_HW_TAG = 1 << 7,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth()
2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
2017-05-10 15:55 ` [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
2017-05-10 15:55 ` [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough Ming Lei
2017-05-10 15:55 ` [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags Ming Lei
3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei
The hardware queue depth can be resized via blk_mq_update_nr_requests(),
so introduce this helper for retrieving queue's depth easily.
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 15 +++++++++++++++
block/blk-mq.h | 1 +
2 files changed, 16 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 50b968fa4922..1a61ca611fae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2135,6 +2135,21 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
}
+/*
+ * Queue depth can be changed via blk_mq_update_nr_requests(),
+ * so use this helper to retrieve queue's depth.
+ */
+int blk_mq_get_queue_depth(struct request_queue *q)
+{
+ /*
+ * All queues have same queue depth, need to revisit
+ * if per hw-queue depth is supported.
+ */
+ struct blk_mq_tags *tags = q->tag_set->tags[0];
+
+ return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
+}
+
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..d49d46de2923 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -138,6 +138,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
void blk_mq_finish_request(struct request *rq);
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
unsigned int op);
+int blk_mq_get_queue_depth(struct request_queue *q);
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough
2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
2017-05-10 15:55 ` [PATCH v3 1/4] blk-mq: introduce BLK_MQ_F_SCHED_USE_HW_TAG Ming Lei
2017-05-10 15:55 ` [PATCH v3 2/4] blk-mq: introduce blk_mq_get_queue_depth() Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
2017-05-10 15:55 ` [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags Ming Lei
3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei
When tag space of one device is big enough, we use hw tag
directly for I/O scheduling.
Now the decision is made if hw queue depth is not less than
q->nr_requests and the tag set isn't shared.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq-sched.c | 80 +++++++++++++++++++++++++++++++++++++++++++++-----
block/blk-mq-sched.h | 8 +++++
block/blk-mq.c | 35 ++++++++++++++++++++--
include/linux/blkdev.h | 8 +++++
4 files changed, 122 insertions(+), 9 deletions(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 2c5981ff9e04..a7e125a40e0a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -417,9 +417,9 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
blk_mq_run_hw_queue(hctx, run_queue_async);
}
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
- struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx)
{
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
@@ -428,9 +428,9 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
}
}
-static int blk_mq_sched_alloc_tags(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx)
{
struct blk_mq_tag_set *set = q->tag_set;
int ret;
@@ -450,14 +450,52 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
return ret;
}
+static int blk_mq_set_queue_depth(struct blk_mq_hw_ctx *hctx,
+ unsigned int nr)
+{
+ if (!hctx->tags)
+ return -EINVAL;
+
+ return blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false);
+}
+
+static int blk_mq_set_queues_depth(struct request_queue *q,
+ unsigned int nr)
+{
+ struct blk_mq_hw_ctx *hctx;
+ int i, j, ret;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ ret = blk_mq_set_queue_depth(hctx, nr);
+ if (ret)
+ goto recovery;
+ }
+ return 0;
+
+ recovery:
+ queue_for_each_hw_ctx(q, hctx, j) {
+ if (j >= i)
+ break;
+ blk_mq_tag_update_depth(hctx, &hctx->tags,
+ q->act_hw_queue_depth,
+ false);
+ }
+ return ret;
+}
+
static void blk_mq_sched_tags_teardown(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int i;
- queue_for_each_hw_ctx(q, hctx, i)
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+ blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+ hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+ }
blk_mq_sched_free_tags(set, hctx, i);
+ }
}
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
@@ -504,12 +542,28 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
}
+/*
+ * If this queue has enough hardware tags and doesn't share tags with
+ * other queues, just use hw tag directly for scheduling.
+ */
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q)
+{
+ if (q->tag_set->flags & BLK_MQ_F_TAG_SHARED)
+ return false;
+
+ if (q->act_hw_queue_depth < q->nr_requests)
+ return false;
+
+ return true;
+}
+
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
struct elevator_queue *eq;
unsigned int i;
int ret;
+ bool auto_hw_tag;
if (!e) {
q->elevator = NULL;
@@ -522,7 +576,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
*/
q->nr_requests = 2 * BLKDEV_MAX_RQ;
+ auto_hw_tag = blk_mq_sched_may_use_hw_tag(q);
+ if (auto_hw_tag) {
+ q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+ if (blk_mq_set_queues_depth(q, q->nr_requests))
+ auto_hw_tag = false;
+ }
+
queue_for_each_hw_ctx(q, hctx, i) {
+ if (auto_hw_tag)
+ hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+ else
+ hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+
ret = blk_mq_sched_alloc_tags(q, hctx, i);
if (ret)
goto err;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..bbfc1ea5fafa 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -25,6 +25,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
@@ -35,6 +36,13 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
int blk_mq_sched_init(struct request_queue *q);
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx);
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
+ unsigned int hctx_idx);
+
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1a61ca611fae..e02fa8d078e6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2150,6 +2150,34 @@ int blk_mq_get_queue_depth(struct request_queue *q)
return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
}
+static void blk_mq_update_sched_flag(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ if (!q->elevator)
+ return;
+
+ if (!blk_mq_sched_may_use_hw_tag(q))
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+ blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+ hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+ }
+ if (!hctx->sched_tags) {
+ if (blk_mq_sched_alloc_tags(q, hctx, i))
+ goto force_use_hw_tag;
+ }
+ }
+ else
+ force_use_hw_tag:
+ queue_for_each_hw_ctx(q, hctx, i) {
+ hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+ if (hctx->sched_tags)
+ blk_mq_sched_free_tags(q->tag_set, hctx, i);
+ }
+}
+
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
@@ -2366,7 +2394,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/*
* Do this after blk_queue_make_request() overrides it...
*/
- q->nr_requests = set->queue_depth;
+ q->act_hw_queue_depth = q->nr_requests = set->queue_depth;
/*
* Default to classic polling
@@ -2689,8 +2717,11 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
break;
}
- if (!ret)
+ if (!ret) {
q->nr_requests = nr;
+ q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+ blk_mq_update_sched_flag(q);
+ }
blk_mq_unfreeze_queue(q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b5d1e27631ee..7389e388d583 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -428,6 +428,14 @@ struct request_queue {
unsigned int nr_hw_queues;
/*
+ * save active hw queue depth before using hw tag for scheduling,
+ * this need to revisit if per hw queue depth is supported.
+ *
+ * Only used by blk-mq-sched.
+ */
+ unsigned int act_hw_queue_depth;
+
+ /*
* Dispatch queue sorting
*/
sector_t end_sector;
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 4/4] blk-mq: allow to use hw tag for shared tags
2017-05-10 15:55 [PATCH v3 0/4] blk-mq: support to use hw tag for scheduling Ming Lei
` (2 preceding siblings ...)
2017-05-10 15:55 ` [PATCH v3 3/4] blk-mq: use hw tag for scheduling if hw tag space is big enough Ming Lei
@ 2017-05-10 15:55 ` Ming Lei
3 siblings, 0 replies; 5+ messages in thread
From: Ming Lei @ 2017-05-10 15:55 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Bart Van Assche, Omar Sandoval, Ming Lei
In case of shared tags, hctx_may_queue() limits that
the maximum number of requests allocated to one hw
queue is .queue_depth / active_queues.
So we try to allow to use hw tag for this case
if .queue_depth/shared_queues is not less than
q->nr_requests.
This can cover some scsi devices too, such as virtio-scsi
in default configuration.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq-sched.c | 16 ++++++++--------
block/blk-mq-sched.h | 1 +
block/blk-mq.c | 21 ++++++++++++++++++---
block/blk-mq.h | 23 +++++++++++++++++++++++
4 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a7e125a40e0a..f2114eb3eebb 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -459,8 +459,7 @@ static int blk_mq_set_queue_depth(struct blk_mq_hw_ctx *hctx,
return blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false);
}
-static int blk_mq_set_queues_depth(struct request_queue *q,
- unsigned int nr)
+int blk_mq_set_queues_depth(struct request_queue *q, unsigned int nr)
{
struct blk_mq_hw_ctx *hctx;
int i, j, ret;
@@ -543,15 +542,14 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
}
/*
- * If this queue has enough hardware tags and doesn't share tags with
- * other queues, just use hw tag directly for scheduling.
+ * If this queue has enough hardware tags, just use hw tag directly
+ * for scheduling.
*/
bool blk_mq_sched_may_use_hw_tag(struct request_queue *q)
{
- if (q->tag_set->flags & BLK_MQ_F_TAG_SHARED)
- return false;
+ int nr_shared = blk_mq_get_shared_queues(q);
- if (q->act_hw_queue_depth < q->nr_requests)
+ if ((q->act_hw_queue_depth / nr_shared) < q->nr_requests)
return false;
return true;
@@ -578,8 +576,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
auto_hw_tag = blk_mq_sched_may_use_hw_tag(q);
if (auto_hw_tag) {
+ unsigned int nr_shared = blk_mq_get_shared_queues(q);
+
q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
- if (blk_mq_set_queues_depth(q, q->nr_requests))
+ if (blk_mq_set_queues_depth(q, q->nr_requests * nr_shared))
auto_hw_tag = false;
}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index bbfc1ea5fafa..6deca4f9e656 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -26,6 +26,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
bool blk_mq_sched_may_use_hw_tag(struct request_queue *q);
+int blk_mq_set_queues_depth(struct request_queue *q, unsigned int nr);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e02fa8d078e6..401a04388ac9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2150,15 +2150,17 @@ int blk_mq_get_queue_depth(struct request_queue *q)
return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
}
-static void blk_mq_update_sched_flag(struct request_queue *q)
+static bool blk_mq_update_sched_flag(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
+ bool use_hw_tag;
if (!q->elevator)
- return;
+ return false;
- if (!blk_mq_sched_may_use_hw_tag(q))
+ use_hw_tag = blk_mq_sched_may_use_hw_tag(q);
+ if (!use_hw_tag)
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
@@ -2176,6 +2178,16 @@ static void blk_mq_update_sched_flag(struct request_queue *q)
if (hctx->sched_tags)
blk_mq_sched_free_tags(q->tag_set, hctx, i);
}
+ return use_hw_tag;
+}
+
+static void blk_mq_update_for_sched(struct request_queue *q)
+{
+ if (!blk_mq_update_sched_flag(q))
+ return;
+
+ blk_mq_set_queues_depth(q, q->nr_requests *
+ __blk_mq_get_shared_queues(q));
}
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
@@ -2217,6 +2229,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
/* update existing queue */
blk_mq_update_tag_set_depth(set, false);
}
+
+ blk_mq_update_for_sched(q);
mutex_unlock(&set->tag_list_lock);
synchronize_rcu();
@@ -2239,6 +2253,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
queue_set_hctx_shared(q, true);
list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
+ blk_mq_update_for_sched(q);
mutex_unlock(&set->tag_list_lock);
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d49d46de2923..3fd869bee744 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -150,4 +150,27 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
return hctx->nr_ctx && hctx->tags;
}
+/* return how many queues shared tag set with me */
+static inline int __blk_mq_get_shared_queues(struct request_queue *q)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+ int nr = 0;
+
+ list_for_each_entry_rcu(q, &set->tag_list, tag_set_list)
+ nr++;
+ return nr;
+}
+
+static inline int blk_mq_get_shared_queues(struct request_queue *q)
+{
+ int nr = 0;
+ struct blk_mq_tag_set *set = q->tag_set;
+
+ mutex_lock(&set->tag_list_lock);
+ nr = __blk_mq_get_shared_queues(q);
+ mutex_unlock(&set->tag_list_lock);
+
+ return nr;
+}
+
#endif
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread