Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk, newella@fb.com, clm@fb.com,
	josef@toxicpanda.com, dennisz@fb.com, lizefan@huawei.com,
	hannes@cmpxchg.org
Cc: linux-kernel@vger.kernel.org, linux-block@vger.kernel.org,
	kernel-team@fb.com, cgroups@vger.kernel.org,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 07/10] blk-mq: add optional request->alloc_time_ns
Date: Wed, 28 Aug 2019 15:05:57 -0700
Message-ID: <20190828220600.2527417-8-tj@kernel.org> (raw)
In-Reply-To: <20190828220600.2527417-1-tj@kernel.org>

There are currently two start time timestamps - start_time_ns and
io_start_time_ns.  The former marks the request allocation and and the
second issue-to-device time.  The planned io.weight controller needs
to measure the total time bios take to execute after it leaves rq_qos
including the time spent waiting for request to become available,
which can easily dominate on saturated devices.

This patch adds request->alloc_time_ns which records when the request
allocation attempt started.  As it isn't used for the usual stats,
make it optional behind CONFIG_BLK_RQ_ALLOC_TIME and
QUEUE_FLAG_RQ_ALLOC_TIME so that it can be compiled out when there are
no users and it's active only on queues which need it even when
compiled in.

v2: s/pre_start_time/alloc_time/ and add CONFIG_BLK_RQ_ALLOC_TIME
    gating as suggested by Jens.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/Kconfig          |  3 +++
 block/blk-mq.c         | 13 +++++++++++--
 include/linux/blkdev.h | 13 ++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index 8b5f8e560eb4..1b62ad6d0e12 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,9 @@ menuconfig BLOCK
 
 if BLOCK
 
+config BLK_RQ_ALLOC_TIME
+	bool
+
 config BLK_SCSI_REQUEST
 	bool
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f6620a30752e..b622029b19ea 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -291,7 +291,7 @@ static inline bool blk_mq_need_time_stamp(struct request *rq)
 }
 
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
-		unsigned int tag, unsigned int op)
+		unsigned int tag, unsigned int op, u64 alloc_time_ns)
 {
 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 	struct request *rq = tags->static_rqs[tag];
@@ -325,6 +325,9 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->rq_disk = NULL;
 	rq->part = NULL;
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+	rq->alloc_time_ns = alloc_time_ns;
+#endif
 	if (blk_mq_need_time_stamp(rq))
 		rq->start_time_ns = ktime_get_ns();
 	else
@@ -356,8 +359,14 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 	struct request *rq;
 	unsigned int tag;
 	bool clear_ctx_on_error = false;
+	u64 alloc_time_ns = 0;
 
 	blk_queue_enter_live(q);
+
+	/* alloc_time includes depth and tag waits */
+	if (blk_queue_rq_alloc_time(q))
+		alloc_time_ns = ktime_get_ns();
+
 	data->q = q;
 	if (likely(!data->ctx)) {
 		data->ctx = blk_mq_get_ctx(q);
@@ -393,7 +402,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		return NULL;
 	}
 
-	rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags);
+	rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns);
 	if (!op_is_flush(data->cmd_flags)) {
 		rq->elv.icq = NULL;
 		if (e && e->type->ops.prepare_request) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ac790178787..d0ad21e4771b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -194,7 +194,11 @@ struct request {
 
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
-	/* Time that I/O was submitted to the kernel. */
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+	/* Time that the first bio started allocating this request. */
+	u64 alloc_time_ns;
+#endif
+	/* Time that this request was allocated for this IO. */
 	u64 start_time_ns;
 	/* Time that I/O was submitted to the device. */
 	u64 io_start_time_ns;
@@ -609,6 +613,7 @@ struct request_queue {
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
 #define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
+#define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP))
@@ -637,6 +642,12 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 #define blk_queue_pci_p2pdma(q)	\
 	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+#define blk_queue_rq_alloc_time(q)	\
+	test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
+#else
+#define blk_queue_rq_alloc_time(q)	false
+#endif
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
-- 
2.17.1


  parent reply index

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-28 22:05 [PATCHSET v3 block/for-linus] IO cost model based work-conserving porportional controller Tejun Heo
2019-08-28 22:05 ` [PATCH 01/10] blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn() Tejun Heo
2019-08-28 22:05 ` [PATCH 02/10] blkcg: make ->cpd_init_fn() optional Tejun Heo
2019-08-28 22:05 ` [PATCH 03/10] blkcg: separate blkcg_conf_get_disk() out of blkg_conf_prep() Tejun Heo
2019-08-28 22:05 ` [PATCH 04/10] block/rq_qos: add rq_qos_merge() Tejun Heo
2019-08-28 22:05 ` [PATCH 05/10] block/rq_qos: implement rq_qos_ops->queue_depth_changed() Tejun Heo
2019-08-28 22:05 ` [PATCH 06/10] blkcg: s/RQ_QOS_CGROUP/RQ_QOS_LATENCY/ Tejun Heo
2019-08-28 22:05 ` Tejun Heo [this message]
2019-08-28 22:05 ` [PATCH 08/10] blkcg: implement blk-iocost Tejun Heo
2019-08-29 15:53   ` [PATCH] blkcg: fix missing free on error path of blk_iocost_init() Tejun Heo
2019-09-10 12:55   ` [PATCH 08/10] blkcg: implement blk-iocost Michal Koutný
2019-09-10 16:08     ` Tejun Heo
2019-09-11  8:18       ` Paolo Valente
2019-09-11 14:16         ` Tejun Heo
2019-09-11 15:54           ` Tejun Heo
2019-09-11 16:44           ` Paolo Valente
2019-10-03 14:51       ` Michal Koutný
2019-10-03 16:45         ` Tejun Heo
2019-10-09 15:36           ` Michal Koutný
2019-10-14 15:36             ` Tejun Heo
2019-11-01 16:15               ` Michal Koutný
2019-11-01 16:56                 ` Paolo Valente
2019-08-28 22:05 ` [PATCH 09/10] blkcg: add tools/cgroup/iocost_monitor.py Tejun Heo
2019-08-28 22:06 ` [PATCH 10/10] blkcg: add tools/cgroup/iocost_coef_gen.py Tejun Heo
2019-08-29  3:29 ` [PATCHSET v3 block/for-linus] IO cost model based work-conserving porportional controller Jens Axboe
     [not found] ` <20190829082248.6464-1-hdanton@sina.com>
2019-08-29 15:43   ` [PATCH 07/10] blk-mq: add optional request->alloc_time_ns Tejun Heo
     [not found] ` <20190829133928.16192-1-hdanton@sina.com>
2019-08-29 15:46   ` [PATCH 08/10] blkcg: implement blk-iocost Tejun Heo
2019-08-29 15:54 ` [PATCHSET v3 block/for-linus] IO cost model based work-conserving porportional controller Paolo Valente
2019-08-29 15:56   ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190828220600.2527417-8-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=clm@fb.com \
    --cc=dennisz@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan@huawei.com \
    --cc=newella@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git