All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaohua Li <shli@fb.com>
To: <linux-kernel@vger.kernel.org>, <linux-block@vger.kernel.org>
Cc: <axboe@kernel.dk>, <tj@kernel.org>,
	Vivek Goyal <vgoyal@redhat.com>, <jmoyer@redhat.com>,
	<Kernel-team@fb.com>
Subject: [PATCH V7 16/18] blk-throttle: add interface for per-cgroup target latency
Date: Mon, 27 Mar 2017 10:51:44 -0700	[thread overview]
Message-ID: <2b83be1e6eb0fc1640ed094752ef218813f50935.1490634565.git.shli@fb.com> (raw)
In-Reply-To: <cover.1490634565.git.shli@fb.com>

Here we introduce per-cgroup latency target. The target determines how a
cgroup can afford latency increasement. We will use the target latency
to calculate a threshold and use it to schedule IO for cgroups. If a
cgroup's bandwidth is below its low limit but its average latency is
below the threshold, other cgroups can safely dispatch more IO even
their bandwidth is higher than their low limits. On the other hand, if
the first cgroup's latency is higher than the threshold, other cgroups
are throttled to their low limits. So the target latency determines how
we efficiently utilize free disk resource without sacifice of worload's
IO latency.

For example, assume 4k IO average latency is 50us when disk isn't
congested. A cgroup sets the target latency to 30us. Then the cgroup can
accept 50+30=80us IO latency. If the cgroupt's average IO latency is
90us and its bandwidth is below low limit, other cgroups are throttled
to their low limit. If the cgroup's average IO latency is 60us, other
cgroups are allowed to dispatch more IO. When other cgroups dispatch
more IO, the first cgroup's IO latency will increase. If it increases to
81us, we then throttle other cgroups.

User will configure the interface in this way:
echo "8:16 rbps=2097152 wbps=max latency=100 idle=200" > io.low

latency is in microsecond unit

By default, latency target is 0, which means to guarantee IO latency.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-throttle.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0ea8698..6e1c298 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -25,6 +25,8 @@ static int throtl_quantum = 32;
 #define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
 #define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
+/* default latency target is 0, eg, guarantee IO latency by default */
+#define DFL_LATENCY_TARGET (0)
 
 static struct blkcg_policy blkcg_policy_throtl;
 
@@ -152,6 +154,7 @@ struct throtl_grp {
 
 	unsigned long last_check_time;
 
+	unsigned long latency_target; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
@@ -449,6 +452,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
 	/* LIMIT_LOW will have default value 0 */
 
+	tg->latency_target = DFL_LATENCY_TARGET;
+
 	return &tg->pd;
 }
 
@@ -1445,6 +1450,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	u64 bps_dft;
 	unsigned int iops_dft;
 	char idle_time[26] = "";
+	char latency_time[26] = "";
 
 	if (!dname)
 		return 0;
@@ -1461,8 +1467,9 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->bps_conf[WRITE][off] == bps_dft &&
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
-	    (off != LIMIT_LOW || tg->idletime_threshold ==
-				  tg->td->dft_idletime_threshold))
+	    (off != LIMIT_LOW ||
+	     (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
+	      tg->latency_target == DFL_LATENCY_TARGET)))
 		return 0;
 
 	if (tg->bps_conf[READ][off] != bps_dft)
@@ -1483,10 +1490,17 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 		else
 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
 				tg->idletime_threshold);
+
+		if (tg->latency_target == ULONG_MAX)
+			strcpy(latency_time, " latency=max");
+		else
+			snprintf(latency_time, sizeof(latency_time),
+				" latency=%lu", tg->latency_target);
 	}
 
-	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s\n",
-		   dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time);
+	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
+		   dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
+		   latency_time);
 	return 0;
 }
 
@@ -1505,6 +1519,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	struct throtl_grp *tg;
 	u64 v[4];
 	unsigned long idle_time;
+	unsigned long latency_time;
 	int ret;
 	int index = of_cft(of)->private;
 
@@ -1520,6 +1535,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[3] = tg->iops_conf[WRITE][index];
 
 	idle_time = tg->idletime_threshold;
+	latency_time = tg->latency_target;
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
 		char *p;
@@ -1553,6 +1569,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 			v[3] = min_t(u64, val, UINT_MAX);
 		else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
 			idle_time = val;
+		else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
+			latency_time = val;
 		else
 			goto out_finish;
 	}
@@ -1583,6 +1601,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 			tg->td->limit_index = LIMIT_LOW;
 		tg->idletime_threshold = (idle_time == ULONG_MAX) ?
 			ULONG_MAX : idle_time;
+		tg->latency_target = (latency_time == ULONG_MAX) ?
+			ULONG_MAX : latency_time;
 	}
 	tg_conf_updated(tg);
 	ret = 0;
-- 
2.9.3

  parent reply	other threads:[~2017-03-27 17:51 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-27 17:51 [PATCH V7 00/18] blk-throttle: add .low limit Shaohua Li
2017-03-27 17:51 ` [PATCH V7 01/18] blk-throttle: use U64_MAX/UINT_MAX to replace -1 Shaohua Li
2017-03-27 17:51 ` [PATCH V7 02/18] blk-throttle: prepare support multiple limits Shaohua Li
2017-03-27 17:51 ` [PATCH V7 03/18] blk-throttle: add configure option for new .low interface Shaohua Li
2017-03-27 17:51 ` [PATCH V7 04/18] blk-throttle: add " Shaohua Li
2017-03-27 17:51 ` [PATCH V7 05/18] blk-throttle: configure bps/iops limit for cgroup in low limit Shaohua Li
2017-03-27 17:51 ` [PATCH V7 06/18] blk-throttle: add upgrade logic for LIMIT_LOW state Shaohua Li
2017-03-27 17:51 ` [PATCH V7 07/18] blk-throttle: add downgrade logic Shaohua Li
2017-03-27 17:51 ` [PATCH V7 08/18] blk-throttle: make sure expire time isn't too big Shaohua Li
2017-03-27 17:51 ` [PATCH V7 09/18] blk-throttle: make throtl_slice tunable Shaohua Li
2017-03-27 17:51 ` [PATCH V7 10/18] blk-throttle: choose a small throtl_slice for SSD Shaohua Li
2017-03-27 17:51 ` [PATCH V7 11/18] blk-throttle: detect completed idle cgroup Shaohua Li
2017-03-27 17:51 ` [PATCH V7 12/18] blk-throttle: make bandwidth change smooth Shaohua Li
2017-03-27 17:51 ` [PATCH V7 13/18] blk-throttle: add a simple idle detection Shaohua Li
2017-03-27 17:51 ` [PATCH V7 14/18] blk-throttle: add interface to configure idle time threshold Shaohua Li
2017-03-27 17:51 ` [PATCH V7 15/18] blk-throttle: ignore idle cgroup limit Shaohua Li
2017-03-27 17:51 ` Shaohua Li [this message]
2017-03-27 17:51 ` [PATCH V7 17/18] blk-throttle: add a mechanism to estimate IO latency Shaohua Li
2017-03-27 17:51 ` [PATCH V7 18/18] blk-throttle: add latency target support Shaohua Li
2017-03-27 18:15 ` [PATCH V7 00/18] blk-throttle: add .low limit Jens Axboe
2017-03-27 19:00   ` Shaohua Li
2017-03-27 19:11     ` Jens Axboe
2017-03-27 22:19       ` [PATCH 0/3] blk-throttle: add .low limit fix Shaohua Li
2017-03-27 22:19         ` [PATCH 1/3] block: track request size in blk_issue_stat Shaohua Li
2017-03-27 22:19         ` [PATCH 2/3] blk-throttle: add a mechanism to estimate IO latency Shaohua Li
2017-03-27 22:19         ` [PATCH 3/3] blk-throttle: add latency target support Shaohua Li
2017-03-28 15:58         ` [PATCH 0/3] blk-throttle: add .low limit fix Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2b83be1e6eb0fc1640ed094752ef218813f50935.1490634565.git.shli@fb.com \
    --to=shli@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=axboe@kernel.dk \
    --cc=jmoyer@redhat.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.