All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaohua Li <shli@fb.com>
To: <linux-kernel@vger.kernel.org>, <linux-block@vger.kernel.org>
Cc: <axboe@kernel.dk>, <tj@kernel.org>,
	Vivek Goyal <vgoyal@redhat.com>,
	"jmoyer @ redhat . com" <jmoyer@redhat.com>, <Kernel-team@fb.com>
Subject: [PATCH V2 07/13] blk-throttle: add interface for proporation based throttle
Date: Mon, 22 Feb 2016 14:01:22 -0800	[thread overview]
Message-ID: <cb836b65ed9fa24c9824ec3f6cb4b76c3b045eb5.1456178093.git.shli@fb.com> (raw)
In-Reply-To: <cover.1456178093.git.shli@fb.com>

There is throttle.mode_device interface. By default blk-throttle is in
NONE mode. Setting original bps/iops limit wil change mode to THROTTLE
automatically, user doesn't need to configure the mode, which is for
backward compabitbility. To use proporation based throttle, user must
configure device to proper mode. 'weight_bw' is for bandwidth
proporation and 'weight_iops' is for iops proporation. Currently
switching between THROTTLR mode and proporation modes is prohibited.
This might be changed in the future.

expected usage:
set to bandwidth based proporation mode
$echo "8:0 weight_bw" > /sys/fs/cgroup/blkio/throttle.mode_device
$mkdir /sys/fs/cgroup/blkio/test
set cgroup weight for all disks
$echo "200" > /sys/fs/cgroup/blkio/test/throttle.weight
or set cgroup weight for one disk
$echo "8:0 200" > /sys/fs/cgroup/blkio/test/throttle.weight_device
$echo $$ > /sys/fs/cgroup/blkio/test/cgroup.procs

Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-throttle.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 223 insertions(+)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a0fd33e..a594000 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -86,6 +86,7 @@ struct throtl_service_queue {
 
 	unsigned int		weight; /* this queue's weight against siblings */
 	unsigned int		acting_weight; /* actual weight of the queue */
+	unsigned int		new_weight; /* weight changed to */
 	unsigned int		children_weight; /* children weight */
 	unsigned int		share; /* disk bandwidth share of the queue */
 
@@ -1529,11 +1530,16 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 		v = -1;
 
 	tg = blkg_to_tg(ctx.blkg);
+	if (td_weight_based(tg->td)) {
+		ret = -EBUSY;
+		goto out_finish;
+	}
 
 	if (is_u64)
 		*(u64 *)((void *)tg + of_cft(of)->private) = v;
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
+	tg->td->mode = MODE_THROTTLE;
 
 	tg_conf_updated(tg);
 	ret = 0;
@@ -1554,8 +1560,217 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
 	return tg_set_conf(of, buf, nbytes, off, false);
 }
 
+static int tg_print_weight(struct seq_file *sf, void *v)
+{
+	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+	struct throtl_group_data *tgd = blkcg_to_tgd(blkcg);
+	unsigned int weight = 0;
+
+	if (tgd)
+		weight = tgd->weight;
+	seq_printf(sf, "%u\n", weight);
+	return 0;
+}
+
+static int tg_set_weight(struct cgroup_subsys_state *css,
+	struct cftype *cft, u64 val)
+{
+	struct blkcg *blkcg = css_to_blkcg(css);
+	struct throtl_group_data *tgd;
+	struct blkcg_gq *blkg;
+
+	if (val < MIN_WEIGHT)
+		val = MIN_WEIGHT;
+	if (val > MAX_WEIGHT)
+		val = MAX_WEIGHT;
+
+	spin_lock_irq(&blkcg->lock);
+	tgd = blkcg_to_tgd(blkcg);
+	if (!tgd) {
+		spin_unlock_irq(&blkcg->lock);
+		return -EINVAL;
+	}
+	tgd->weight = val;
+
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+		struct throtl_grp *tg = blkg_to_tg(blkg);
+
+		if (!tg)
+			continue;
+		/* can't hold queue->lock here, weight changing is deferred */
+		if (td_weight_based(tg->td))
+			tg->service_queue.new_weight = val;
+	}
+	spin_unlock_irq(&blkcg->lock);
+	return 0;
+}
+
+static void __tg_set_weight(struct throtl_grp *tg, unsigned int weight)
+{
+	unsigned int old_weight;
+
+	old_weight = tg->service_queue.acting_weight;
+
+	tg->service_queue.weight = weight;
+	tg->service_queue.new_weight = 0;
+	if (old_weight && tg->service_queue.parent_sq) {
+		struct throtl_service_queue *psq = tg->service_queue.parent_sq;
+		if (weight > old_weight)
+			psq->children_weight += weight - old_weight;
+		else if (weight < old_weight)
+			psq->children_weight -= old_weight - weight;
+		tg->service_queue.acting_weight = weight;
+	}
+
+	tg_update_share(tg->td, tg);
+}
+
+static ssize_t tg_set_weight_device(struct kernfs_open_file *of,
+			   char *buf, size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct blkg_conf_ctx ctx;
+	struct throtl_grp *tg;
+	unsigned int weight;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (sscanf(ctx.body, "%u", &weight) != 1)
+		goto out_finish;
+	if (weight < MIN_WEIGHT)
+		weight = MIN_WEIGHT;
+	if (weight > MAX_WEIGHT)
+		weight = MAX_WEIGHT;
+
+	tg = blkg_to_tg(ctx.blkg);
+	if (!td_weight_based(tg->td)) {
+		ret = -EBUSY;
+		goto out_finish;
+	}
+
+	__tg_set_weight(tg, weight);
+
+	tg_conf_updated(tg);
+	ret = 0;
+out_finish:
+	blkg_conf_finish(&ctx);
+	return ret ?: nbytes;
+}
+
+static u64 tg_prfill_mode_device(struct seq_file *sf,
+	struct blkg_policy_data *pd, int off)
+{
+	struct throtl_grp *tg = pd_to_tg(pd);
+	const char *dname = blkg_dev_name(pd->blkg);
+
+	if (!dname)
+		return 0;
+	if (tg->td->mode == MODE_NONE)
+		return 0;
+	seq_printf(sf, "%s %s\n", dname, run_mode_name[tg->td->mode]);
+	return 0;
+}
+
+static int throtl_print_mode_device(struct seq_file *sf, void *v)
+{
+	int i;
+	seq_printf(sf, "available ");
+	for (i = 0; i < MAX_MODE; i++)
+		seq_printf(sf, "%s ", run_mode_name[i]);
+	seq_printf(sf, "\n");
+	seq_printf(sf, "default %s\n", run_mode_name[MODE_NONE]);
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+		tg_prfill_mode_device,  &blkcg_policy_throtl, 0, false);
+	return 0;
+}
+
+static u64 tg_prfill_weight_uint(struct seq_file *sf,
+	struct blkg_policy_data *pd, int off)
+{
+	struct throtl_grp *tg = pd_to_tg(pd);
+	struct throtl_group_data *tgd = blkcg_to_tgd(pd_to_blkg(pd)->blkcg);
+	unsigned int v = *(unsigned int *)((void *)tg + off);
+
+	if (v == tgd->weight)
+		return 0;
+	return __blkg_prfill_u64(sf, pd, v);
+}
+
+static int tg_print_weight_device(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_weight_uint,
+			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static ssize_t tg_set_mode_device(struct kernfs_open_file *of,
+				  char *buf, size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct blkg_conf_ctx ctx;
+	struct throtl_grp *tg;
+	int ret;
+	char mode_name[20] = "";
+	int mode;
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (sscanf(ctx.body, "%s", mode_name) != 1)
+		goto out_finish;
+
+	for (mode = 0; mode < MAX_MODE; mode++)
+		if (!strcmp(mode_name, run_mode_name[mode]))
+			break;
+	if (mode == MAX_MODE)
+		goto out_finish;
+
+	tg = blkg_to_tg(ctx.blkg);
+	if (tg->td->mode == mode) {
+		ret = 0;
+		goto out_finish;
+	}
+	/* Don't allow switching between throttle and weight based currently */
+	if (tg->td->mode != MODE_NONE) {
+		ret = -EBUSY;
+		goto out_finish;
+	}
+
+	tg->td->mode = mode;
+
+	ret = 0;
+out_finish:
+	blkg_conf_finish(&ctx);
+	return ret ?: nbytes;
+}
+
 static struct cftype throtl_legacy_files[] = {
 	{
+		.name = "throttle.mode_device",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = throtl_print_mode_device,
+		.write = tg_set_mode_device,
+	},
+	{
+		.name = "throttle.weight",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = tg_print_weight,
+		.write_u64 = tg_set_weight,
+	},
+	{
+		.name = "throttle.weight_device",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.private = offsetof(struct throtl_grp, service_queue.weight),
+		.seq_show = tg_print_weight_device,
+		.write = tg_set_weight_device,
+	},
+	{
 		.name = "throttle.read_bps_device",
 		.private = offsetof(struct throtl_grp, io_cost.bps[READ]),
 		.seq_show = tg_print_conf_u64,
@@ -1728,6 +1943,13 @@ static struct blkcg_policy blkcg_policy_throtl = {
 	.pd_free_fn		= throtl_pd_free,
 };
 
+static void tg_check_new_weight(struct throtl_grp *tg)
+{
+	if (!td_weight_based(tg->td) || !tg->service_queue.new_weight)
+		return;
+	__tg_set_weight(tg, tg->service_queue.new_weight);
+}
+
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 		    struct bio *bio)
 {
@@ -1751,6 +1973,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 
 	sq = &tg->service_queue;
 
+	tg_check_new_weight(tg);
 	detect_inactive_cg(tg);
 	while (true) {
 		/* throtl is FIFO - if bios are already queued, should queue */
-- 
2.6.5

  parent reply	other threads:[~2016-02-22 22:04 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-22 22:01 [PATCH V2 00/13] block-throttle: proportional throttle Shaohua Li
2016-02-22 22:01 ` [PATCH V2 01/13] block: estimate disk performance Shaohua Li
2016-02-22 22:01 ` [PATCH V2 02/13] blk-throttle: cleanup io cost related stuff Shaohua Li
2016-02-22 22:01 ` [PATCH V2 03/13] blk-throttle: add abstract to index data Shaohua Li
2016-02-22 22:01 ` [PATCH V2 04/13] blk-throttle: weight based throttling Shaohua Li
2016-02-22 22:01 ` [PATCH V2 05/13] blk-throttling: detect inactive cgroup Shaohua Li
2016-02-22 22:01 ` [PATCH V2 06/13] blk-throttle: add per-cgroup data Shaohua Li
2016-02-22 22:01 ` Shaohua Li [this message]
2016-02-22 22:01 ` [PATCH V2 08/13] blk-throttle: add cgroup2 interface Shaohua Li
2016-02-22 22:01 ` [PATCH V2 09/13] blk-throttle: add trace for new proporation throttle Shaohua Li
2016-02-22 22:01 ` [PATCH V2 10/13] blk-throttle: over estimate bandwidth Shaohua Li
2016-02-22 22:01 ` [PATCH V2 11/13] blk-throttle: shrink cgroup share if its target is overestimated Shaohua Li
2016-02-22 22:01 ` [PATCH V2 12/13] blk-throttle: restore shrinked cgroup share Shaohua Li
2016-02-22 22:01 ` [PATCH V2 13/13] blk-throttle: detect wrong shrink Shaohua Li
2016-02-28 15:02 ` [PATCH V2 00/13] block-throttle: proportional throttle Pavel Machek
2016-03-01  5:19   ` Shaohua Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cb836b65ed9fa24c9824ec3f6cb4b76c3b045eb5.1456178093.git.shli@fb.com \
    --to=shli@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=axboe@kernel.dk \
    --cc=jmoyer@redhat.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.