All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaohua Li <shli@fb.com>
To: <linux-kernel@vger.kernel.org>, <linux-block@vger.kernel.org>
Cc: <axboe@kernel.dk>, <tj@kernel.org>,
	Vivek Goyal <vgoyal@redhat.com>,
	"jmoyer @ redhat . com" <jmoyer@redhat.com>, <Kernel-team@fb.com>
Subject: [PATCH V2 01/13] block: estimate disk performance
Date: Mon, 22 Feb 2016 14:01:16 -0800	[thread overview]
Message-ID: <ce417629c09e0d93f6d323a5e83457f59397b065.1456178093.git.shli@fb.com> (raw)
In-Reply-To: <cover.1456178093.git.shli@fb.com>

weight based blk-throttling can use the estimated performance to
calculate cgroup bandwidth/IOPS. That said we never can't get accurate
disk performance in a complex workload, a roughly estimation is enough.

One issue is workload might not send enough IO to make disk 100% busy.
The calculation should compensate disk utilization. The calculation
equation is:
bps = (bytes * HZ / time) * (time / busy_time) = bytes * HZ / busy_time
iops = (ios * HZ / time) * (time / busy_time) = ios * HZ / busy_time

Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-core.c       | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-sysfs.c      | 13 ++++++++++++
 include/linux/blkdev.h |  7 +++++++
 3 files changed, 76 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index ab51685..4244f28 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1919,6 +1919,59 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
 	return 0;
 }
 
+#define UPDATE_TIME (HZ / 2)
+static void blk_update_perf(struct request_queue *q,
+	struct hd_struct *p)
+{
+	unsigned long now = jiffies;
+	unsigned long last = q->bw_timestamp;
+	sector_t read_sect, write_sect, tmp_sect;
+	unsigned long read_ios, write_ios, tmp_ios;
+	unsigned long current_ticks;
+	unsigned int busy_ticks;
+
+	if (time_before(now, last + UPDATE_TIME))
+		return;
+
+	if (cmpxchg(&q->bw_timestamp, last, now) != last)
+		return;
+
+	tmp_sect = part_stat_read(p, sectors[READ]);
+	read_sect = tmp_sect - q->last_sects[READ];
+	q->last_sects[READ] = tmp_sect;
+	tmp_sect = part_stat_read(p, sectors[WRITE]);
+	write_sect = tmp_sect - q->last_sects[WRITE];
+	q->last_sects[WRITE] = tmp_sect;
+
+	tmp_ios = part_stat_read(p, ios[READ]);
+	read_ios = tmp_ios - q->last_ios[READ];
+	q->last_ios[READ] = tmp_ios;
+	tmp_ios = part_stat_read(p, ios[WRITE]);
+	write_ios = tmp_ios - q->last_ios[WRITE];
+	q->last_ios[WRITE] = tmp_ios;
+
+	current_ticks = part_stat_read(p, io_ticks);
+	busy_ticks = current_ticks - q->last_ticks;
+	q->last_ticks = current_ticks;
+
+	/* Don't account for long idle */
+	if (now - last > UPDATE_TIME * 2)
+		return;
+	/* Disk load is too low or driver doesn't account io_ticks */
+	if (busy_ticks == 0)
+		return;
+
+	if (busy_ticks > now - last)
+		busy_ticks = now - last;
+
+	tmp_sect = (read_sect + write_sect) * HZ;
+	sector_div(tmp_sect, busy_ticks);
+	q->disk_bw = tmp_sect;
+
+	tmp_ios = (read_ios + write_ios) * HZ / busy_ticks;
+	q->disk_iops = tmp_ios;
+}
+
 static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
@@ -1991,6 +2044,9 @@ generic_make_request_checks(struct bio *bio)
 	 */
 	create_io_context(GFP_ATOMIC, q->node);
 
+	blk_update_perf(q,
+		part->partno ? &part_to_disk(part)->part0 : part);
+
 	if (!blkcg_bio_issue_check(q, bio))
 		return false;
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e140cc4..b59461c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -348,6 +348,13 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	return ret;
 }
 
+static ssize_t queue_avg_perf_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu %llu\n",
+		       (unsigned long long)q->disk_bw * 512,
+		       (unsigned long long)q->disk_iops);
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -479,6 +486,11 @@ static struct queue_sysfs_entry queue_poll_entry = {
 	.store = queue_poll_store,
 };
 
+static struct queue_sysfs_entry queue_avg_perf_entry = {
+	.attr = {.name = "average_perf", .mode = S_IRUGO },
+	.show = queue_avg_perf_show,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -504,6 +516,7 @@ static struct attribute *default_attrs[] = {
 	&queue_iostats_entry.attr,
 	&queue_random_entry.attr,
 	&queue_poll_entry.attr,
+	&queue_avg_perf_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 29189ae..d2d6a7b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -466,6 +466,13 @@ struct request_queue {
 	struct bio_set		*bio_split;
 
 	bool			mq_sysfs_init_done;
+
+	unsigned long bw_timestamp;
+	unsigned long last_ticks;
+	sector_t last_sects[2];
+	unsigned long last_ios[2];
+	sector_t disk_bw;
+	unsigned long disk_iops;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
2.6.5

  reply	other threads:[~2016-02-22 22:04 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-22 22:01 [PATCH V2 00/13] block-throttle: proportional throttle Shaohua Li
2016-02-22 22:01 ` Shaohua Li [this message]
2016-02-22 22:01 ` [PATCH V2 02/13] blk-throttle: cleanup io cost related stuff Shaohua Li
2016-02-22 22:01 ` [PATCH V2 03/13] blk-throttle: add abstract to index data Shaohua Li
2016-02-22 22:01 ` [PATCH V2 04/13] blk-throttle: weight based throttling Shaohua Li
2016-02-22 22:01 ` [PATCH V2 05/13] blk-throttling: detect inactive cgroup Shaohua Li
2016-02-22 22:01 ` [PATCH V2 06/13] blk-throttle: add per-cgroup data Shaohua Li
2016-02-22 22:01 ` [PATCH V2 07/13] blk-throttle: add interface for proporation based throttle Shaohua Li
2016-02-22 22:01 ` [PATCH V2 08/13] blk-throttle: add cgroup2 interface Shaohua Li
2016-02-22 22:01 ` [PATCH V2 09/13] blk-throttle: add trace for new proporation throttle Shaohua Li
2016-02-22 22:01 ` [PATCH V2 10/13] blk-throttle: over estimate bandwidth Shaohua Li
2016-02-22 22:01 ` [PATCH V2 11/13] blk-throttle: shrink cgroup share if its target is overestimated Shaohua Li
2016-02-22 22:01 ` [PATCH V2 12/13] blk-throttle: restore shrinked cgroup share Shaohua Li
2016-02-22 22:01 ` [PATCH V2 13/13] blk-throttle: detect wrong shrink Shaohua Li
2016-02-28 15:02 ` [PATCH V2 00/13] block-throttle: proportional throttle Pavel Machek
2016-03-01  5:19   ` Shaohua Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ce417629c09e0d93f6d323a5e83457f59397b065.1456178093.git.shli@fb.com \
    --to=shli@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=axboe@kernel.dk \
    --cc=jmoyer@redhat.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.