All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huaixin Chang <changhuaixin@linux.alibaba.com>
To: linux-kernel@vger.kernel.org
Cc: bsegall@google.com, changhuaixin@linux.alibaba.com,
	dietmar.eggemann@arm.com, juri.lelli@redhat.com, mgorman@suse.de,
	mingo@redhat.com, pauld@redhead.com, peterz@infradead.org,
	pjt@google.com, rostedt@goodmis.org, vincent.guittot@linaro.org,
	khlebnikov@yandex-team.ru, xiyou.wangcong@gmail.com,
	shanpeic@linux.alibaba.com
Subject: [PATCH 1/4] sched/fair: Introduce primitives for CFS bandwidth burst
Date: Thu, 17 Dec 2020 15:46:17 +0800	[thread overview]
Message-ID: <20201217074620.58338-2-changhuaixin@linux.alibaba.com> (raw)
In-Reply-To: <20201217074620.58338-1-changhuaixin@linux.alibaba.com>

In this patch, we introduce the notion of CFS bandwidth burst. Unused
"quota" from pervious "periods" might be accumulated and used in the
following "periods". The maximum amount of accumulated bandwidth is
bounded by "burst". And the maximun amount of CPU a group can consume in
a given period is "buffer" which is equivalent to "quota" + "burst in
case that this group has done enough accumulation.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shanpei Chen <shanpeic@linux.alibaba.com>
---
 kernel/sched/core.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++--------
 kernel/sched/fair.c  |  2 ++
 kernel/sched/sched.h |  2 ++
 3 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e7e453492cff..48d3bad12be2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7895,10 +7895,12 @@ static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
-static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
+static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
+							u64 burst)
 {
 	int i, ret = 0, runtime_enabled, runtime_was_enabled;
 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+	u64 buffer;
 
 	if (tg == &root_task_group)
 		return -EINVAL;
@@ -7925,6 +7927,16 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 	if (quota != RUNTIME_INF && quota > max_cfs_runtime)
 		return -EINVAL;
 
+	/*
+	 * Bound burst to defend burst against overflow during bandwidth shift.
+	 */
+	if (burst > max_cfs_runtime)
+		return -EINVAL;
+
+	if (quota == RUNTIME_INF)
+		buffer = RUNTIME_INF;
+	else
+		buffer = min(max_cfs_runtime, quota + burst);
 	/*
 	 * Prevent race between setting of cfs_rq->runtime_enabled and
 	 * unthrottle_offline_cfs_rqs().
@@ -7946,6 +7958,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 	raw_spin_lock_irq(&cfs_b->lock);
 	cfs_b->period = ns_to_ktime(period);
 	cfs_b->quota = quota;
+	cfs_b->burst = burst;
+	cfs_b->buffer = buffer;
 
 	__refill_cfs_bandwidth_runtime(cfs_b);
 
@@ -7979,9 +7993,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 
 static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 {
-	u64 quota, period;
+	u64 quota, period, burst;
 
 	period = ktime_to_ns(tg->cfs_bandwidth.period);
+	burst = tg->cfs_bandwidth.burst;
 	if (cfs_quota_us < 0)
 		quota = RUNTIME_INF;
 	else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
@@ -7989,7 +8004,7 @@ static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 	else
 		return -EINVAL;
 
-	return tg_set_cfs_bandwidth(tg, period, quota);
+	return tg_set_cfs_bandwidth(tg, period, quota, burst);
 }
 
 static long tg_get_cfs_quota(struct task_group *tg)
@@ -8007,15 +8022,16 @@ static long tg_get_cfs_quota(struct task_group *tg)
 
 static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
 {
-	u64 quota, period;
+	u64 quota, period, burst;
 
 	if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
 		return -EINVAL;
 
 	period = (u64)cfs_period_us * NSEC_PER_USEC;
 	quota = tg->cfs_bandwidth.quota;
+	burst = tg->cfs_bandwidth.burst;
 
-	return tg_set_cfs_bandwidth(tg, period, quota);
+	return tg_set_cfs_bandwidth(tg, period, quota, burst);
 }
 
 static long tg_get_cfs_period(struct task_group *tg)
@@ -8028,6 +8044,35 @@ static long tg_get_cfs_period(struct task_group *tg)
 	return cfs_period_us;
 }
 
+static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us)
+{
+	u64 quota, period, burst;
+
+	period = ktime_to_ns(tg->cfs_bandwidth.period);
+	quota = tg->cfs_bandwidth.quota;
+	if (cfs_burst_us < 0)
+		burst = RUNTIME_INF;
+	else if ((u64)cfs_burst_us <= U64_MAX / NSEC_PER_USEC)
+		burst = (u64)cfs_burst_us * NSEC_PER_USEC;
+	else
+		return -EINVAL;
+
+	return tg_set_cfs_bandwidth(tg, period, quota, burst);
+}
+
+static long tg_get_cfs_burst(struct task_group *tg)
+{
+	u64 burst_us;
+
+	if (tg->cfs_bandwidth.burst == RUNTIME_INF)
+		return -1;
+
+	burst_us = tg->cfs_bandwidth.burst;
+	do_div(burst_us, NSEC_PER_USEC);
+
+	return burst_us;
+}
+
 static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
 				  struct cftype *cft)
 {
@@ -8052,6 +8097,18 @@ static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
 	return tg_set_cfs_period(css_tg(css), cfs_period_us);
 }
 
+static s64 cpu_cfs_burst_read_s64(struct cgroup_subsys_state *css,
+				  struct cftype *cft)
+{
+	return tg_get_cfs_burst(css_tg(css));
+}
+
+static int cpu_cfs_burst_write_s64(struct cgroup_subsys_state *css,
+				   struct cftype *cftype, s64 cfs_burst_us)
+{
+	return tg_set_cfs_burst(css_tg(css), cfs_burst_us);
+}
+
 struct cfs_schedulable_data {
 	struct task_group *tg;
 	u64 period, quota;
@@ -8204,6 +8261,11 @@ static struct cftype cpu_legacy_files[] = {
 		.read_u64 = cpu_cfs_period_read_u64,
 		.write_u64 = cpu_cfs_period_write_u64,
 	},
+	{
+		.name = "cfs_burst_us",
+		.read_s64 = cpu_cfs_burst_read_s64,
+		.write_s64 = cpu_cfs_burst_write_s64,
+	},
 	{
 		.name = "stat",
 		.seq_show = cpu_cfs_stat_show,
@@ -8324,26 +8386,27 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
 #endif
 
 static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
-						  long period, long quota)
+					long period, long quota, long burst)
 {
 	if (quota < 0)
 		seq_puts(sf, "max");
 	else
 		seq_printf(sf, "%ld", quota);
 
-	seq_printf(sf, " %ld\n", period);
+	seq_printf(sf, " %ld %ld\n", period, burst);
 }
 
-/* caller should put the current value in *@periodp before calling */
+/* caller should put the current value in *@periodp and *@burstp before calling */
 static int __maybe_unused cpu_period_quota_parse(char *buf,
-						 u64 *periodp, u64 *quotap)
+					 u64 *periodp, u64 *quotap, u64 *burstp)
 {
 	char tok[21];	/* U64_MAX */
 
-	if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
+	if (sscanf(buf, "%20s %llu %llu", tok, periodp, burstp) < 1)
 		return -EINVAL;
 
 	*periodp *= NSEC_PER_USEC;
+	*burstp *= NSEC_PER_USEC;
 
 	if (sscanf(tok, "%llu", quotap))
 		*quotap *= NSEC_PER_USEC;
@@ -8360,7 +8423,8 @@ static int cpu_max_show(struct seq_file *sf, void *v)
 {
 	struct task_group *tg = css_tg(seq_css(sf));
 
-	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg),
+			tg_get_cfs_burst(tg));
 	return 0;
 }
 
@@ -8369,12 +8433,13 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
 {
 	struct task_group *tg = css_tg(of_css(of));
 	u64 period = tg_get_cfs_period(tg);
+	u64 burst = tg_get_cfs_burst(tg);
 	u64 quota;
 	int ret;
 
-	ret = cpu_period_quota_parse(buf, &period, &quota);
+	ret = cpu_period_quota_parse(buf, &period, &quota, &burst);
 	if (!ret)
-		ret = tg_set_cfs_bandwidth(tg, period, quota);
+		ret = tg_set_cfs_bandwidth(tg, period, quota, burst);
 	return ret ?: nbytes;
 }
 #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ae7ceba8fd4f..6bb4f89259fd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5242,6 +5242,8 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	cfs_b->runtime = 0;
 	cfs_b->quota = RUNTIME_INF;
 	cfs_b->period = ns_to_ktime(default_cfs_period());
+	cfs_b->burst = 0;
+	cfs_b->buffer = RUNTIME_INF;
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
 	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index df80bfcea92e..a8772eca8cbb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -364,6 +364,8 @@ struct cfs_bandwidth {
 	ktime_t			period;
 	u64			quota;
 	u64			runtime;
+	u64			burst;
+	u64			buffer;
 	s64			hierarchical_quota;
 
 	u8			idle;
-- 
2.14.4.44.g2045bb6


  reply	other threads:[~2020-12-17  7:48 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-17  7:46 [PATCH 0/4] sched/fair: Burstable CFS bandwidth controller Huaixin Chang
2020-12-17  7:46 ` Huaixin Chang [this message]
2020-12-17 13:36   ` [PATCH 1/4] sched/fair: Introduce primitives for CFS bandwidth burst Peter Zijlstra
2020-12-21 13:53     ` changhuaixin
2021-01-12  9:21       ` changhuaixin
2020-12-17  7:46 ` [PATCH 2/4] sched/fair: Make CFS bandwidth controller burstable Huaixin Chang
2020-12-18  9:53   ` kernel test robot
2020-12-18  9:53     ` kernel test robot
2020-12-17  7:46 ` [PATCH 3/4] sched/fair: Add cfs bandwidth burst statistics Huaixin Chang
2020-12-17  7:46 ` [PATCH 4/4] sched/fair: Add document for burstable CFS bandwidth control Huaixin Chang
2020-12-17 21:25 ` [PATCH 0/4] sched/fair: Burstable CFS bandwidth controller Benjamin Segall
2021-01-20 12:27 ` [PATCH v2 " Huaixin Chang
2021-01-20 12:27   ` [PATCH 1/4] sched/fair: Introduce primitives for CFS bandwidth burst Huaixin Chang
2021-01-20 12:27   ` [PATCH 2/4] sched/fair: Make CFS bandwidth controller burstable Huaixin Chang
2021-01-20 17:06     ` kernel test robot
2021-01-20 17:06       ` kernel test robot
2021-01-20 18:33     ` kernel test robot
2021-01-20 18:33       ` kernel test robot
2021-01-20 22:01     ` kernel test robot
2021-01-20 22:01       ` kernel test robot
2021-01-20 22:01     ` [RFC PATCH] sched/fair: __refill_cfs_bandwidth_runtime() can be static kernel test robot
2021-01-20 22:01       ` kernel test robot
2021-01-20 12:27   ` [PATCH 3/4] sched/fair: Add cfs bandwidth burst statistics Huaixin Chang
2021-01-20 12:27   ` [PATCH 4/4] sched/fair: Add document for burstable CFS bandwidth control Huaixin Chang
2021-01-20 19:48     ` Randy Dunlap
2021-01-21 11:04 ` [PATCH v3 0/4] sched/fair: Burstable CFS bandwidth controller Huaixin Chang
2021-01-21 11:04   ` [PATCH v3 1/4] sched/fair: Introduce primitives for CFS bandwidth burst Huaixin Chang
2021-03-10 12:39     ` Peter Zijlstra
2021-01-21 11:04   ` [PATCH v3 2/4] sched/fair: Make CFS bandwidth controller burstable Huaixin Chang
2021-03-10 13:04     ` Peter Zijlstra
2021-03-12 13:54       ` changhuaixin
2021-03-16 10:55         ` Peter Zijlstra
2021-01-21 11:04   ` [PATCH v3 3/4] sched/fair: Add cfs bandwidth burst statistics Huaixin Chang
2021-03-10 13:10     ` Peter Zijlstra
2021-01-21 11:04   ` [PATCH v3 4/4] sched/fair: Add document for burstable CFS bandwidth control Huaixin Chang
2021-03-10 13:17     ` Peter Zijlstra
2021-01-26 10:18   ` [PATCH v3 0/4] sched/fair: Burstable CFS bandwidth controller changhuaixin
2021-03-10 12:26     ` Peter Zijlstra
2021-02-09 13:17   ` Odin Ugedal
2021-02-09 10:28     ` Tejun Heo
2021-02-27 13:48     ` changhuaixin
2021-03-10 11:11       ` Odin Ugedal
2021-03-12 13:26         ` changhuaixin
2021-02-02 11:40 [PATCH " Huaixin Chang
2021-02-02 11:40 ` [PATCH 1/4] sched/fair: Introduce primitives for CFS bandwidth burst Huaixin Chang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201217074620.58338-2-changhuaixin@linux.alibaba.com \
    --to=changhuaixin@linux.alibaba.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=khlebnikov@yandex-team.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pauld@redhead.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rostedt@goodmis.org \
    --cc=shanpeic@linux.alibaba.com \
    --cc=vincent.guittot@linaro.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.