linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Huaixin Chang <changhuaixin@linux.alibaba.com>
Cc: bsegall@google.com, dietmar.eggemann@arm.com,
	juri.lelli@redhat.com, khlebnikov@yandex-team.ru,
	linux-kernel@vger.kernel.org, mgorman@suse.de, mingo@redhat.com,
	odin@uged.al, odin@ugedal.com, pauld@redhead.com, pjt@google.com,
	rostedt@goodmis.org, shanpeic@linux.alibaba.com, tj@kernel.org,
	vincent.guittot@linaro.org, xiyou.wangcong@gmail.com
Subject: Re: [PATCH v4 1/4] sched/fair: Introduce primitives for CFS bandwidth burst
Date: Tue, 16 Mar 2021 11:40:35 +0100	[thread overview]
Message-ID: <YFCLI79Tm0i8uaxH@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20210316044931.39733-2-changhuaixin@linux.alibaba.com>

On Tue, Mar 16, 2021 at 12:49:28PM +0800, Huaixin Chang wrote:
> And the maximun amount of CPU a group can consume in
> a given period is "buffer" which is equivalent to "quota" + "burst in
> case that this group has done enough accumulation.

I'm confused as heck about cfs_b->buffer. Why do you need that? What's
wrong with cfs_b->runtime ?

Currently, by being strict, we ignore any remaining runtime and the
period timer resets runtime to quota and life goes on. What you want to
do is instead of resetting runtime, add quota and limit the total.

That is, currently it does:

	runtime = min(runtime + quota, quota);

which by virtue of runtime not being allowed negative, it the exact same
as:

	runtime = quota;

Which is what we have in refill.

Fix that to be something like:

	runtime = min(runtime + quota, quota + burst)

and you're basically done. And that seems *much* simpler.

What am I missing, why have you made it so complicated?


/me looks again..

Oooh, I think I see, all this is because you don't do your constraints
right. Removing static from max_cfs_runtime is a big clue you did that
wrong.

Something like this on top of the first two. Much simpler!

Now I just need to figure out wth you mucked about with where we call
refill.

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8954,7 +8954,7 @@ static DEFINE_MUTEX(cfs_constraints_mute
 const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
 static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
 /* More than 203 days if BW_SHIFT equals 20. */
-const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
+static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
@@ -8989,10 +8989,10 @@ static int tg_set_cfs_bandwidth(struct t
 	if (quota != RUNTIME_INF && quota > max_cfs_runtime)
 		return -EINVAL;
 
-	/*
-	 * Bound burst to defend burst against overflow during bandwidth shift.
-	 */
-	if (burst > max_cfs_runtime)
+	if (burst > quota)
+		return -EINVAL;
+
+	if (quota + burst > max_cfs_runtime)
 		return -EINVAL;
 
 	/*
@@ -9019,8 +9019,6 @@ static int tg_set_cfs_bandwidth(struct t
 	cfs_b->burst = burst;
 
 	if (runtime_enabled) {
-		cfs_b->buffer = min(max_cfs_runtime, quota + burst);
-		cfs_b->max_overrun = DIV_ROUND_UP_ULL(max_cfs_runtime, quota);
 		cfs_b->runtime = cfs_b->quota;
 
 		/* Restart the period timer (if active) to handle new period expiry: */
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4621,23 +4621,22 @@ static inline u64 sched_cfs_bandwidth_sl
  *
  * requires cfs_b->lock
  */
-static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b,
-					   u64 overrun)
+static void
+__refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b, u64 overrun)
 {
-	u64 refill;
-
-	if (cfs_b->quota != RUNTIME_INF) {
-
-		if (!sysctl_sched_cfs_bw_burst_enabled) {
-			cfs_b->runtime = cfs_b->quota;
-			return;
-		}
+	if (unlikely(cfs_b->quota == RUNTIME_INF))
+		return;
 
-		overrun = min(overrun, cfs_b->max_overrun);
-		refill = cfs_b->quota * overrun;
-		cfs_b->runtime += refill;
-		cfs_b->runtime = min(cfs_b->runtime, cfs_b->buffer);
+	if (!sysctl_sched_cfs_bw_burst_enabled) {
+		cfs_b->runtime = cfs_b->quota;
+		return;
 	}
+
+	/*
+	 * Ignore @overrun since burst <= quota.
+	 */
+	cfs_b->runtime += cfs_b->quota;
+	cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst);
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -5226,7 +5225,6 @@ static enum hrtimer_restart sched_cfs_sl
 }
 
 extern const u64 max_cfs_quota_period;
-extern const u64 max_cfs_runtime;
 
 static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 {
@@ -5256,18 +5254,7 @@ static enum hrtimer_restart sched_cfs_pe
 			new = old * 2;
 			if (new < max_cfs_quota_period) {
 				cfs_b->period = ns_to_ktime(new);
-				cfs_b->quota = min(cfs_b->quota * 2,
-						   max_cfs_runtime);
-
-				cfs_b->buffer = min(cfs_b->quota + cfs_b->burst,
-						    max_cfs_runtime);
-				/*
-				 * Add 1 in case max_overrun becomes 0.
-				 * 0 max_overrun will cause no runtime being
-				 * refilled in __refill_cfs_bandwidth_runtime().
-				 */
-				cfs_b->max_overrun >>= 1;
-				cfs_b->max_overrun++;
+				cfs_b->quota *= 2;
 
 				pr_warn_ratelimited(
 	"cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
@@ -5300,7 +5287,6 @@ void init_cfs_bandwidth(struct cfs_bandw
 	cfs_b->quota = RUNTIME_INF;
 	cfs_b->period = ns_to_ktime(default_cfs_period());
 	cfs_b->burst = 0;
-	cfs_b->buffer = RUNTIME_INF;
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
 	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -366,8 +366,6 @@ struct cfs_bandwidth {
 	u64			quota;
 	u64			runtime;
 	u64			burst;
-	u64			buffer;
-	u64			max_overrun;
 	s64			hierarchical_quota;
 
 	u8			idle;

  parent reply	other threads:[~2021-03-16 10:42 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-16  4:49 [PATCH v4 0/4] sched/fair: Burstable CFS bandwidth controller Huaixin Chang
2021-03-16  4:49 ` [PATCH v4 1/4] sched/fair: Introduce primitives for CFS bandwidth burst Huaixin Chang
2021-03-16  9:27   ` Peter Zijlstra
2021-03-16  9:41   ` Peter Zijlstra
2021-03-16  9:54   ` Peter Zijlstra
2021-03-17  7:16     ` changhuaixin
2021-03-17  8:06       ` Peter Zijlstra
2021-03-18  1:26         ` changhuaixin
2021-03-18 12:59           ` Phil Auld
2021-03-18 15:10             ` Peter Zijlstra
2021-04-19  8:18               ` changhuaixin
2021-03-19 12:51             ` changhuaixin
2021-03-18 15:05           ` Peter Zijlstra
2021-03-19 12:39             ` changhuaixin
2021-03-20  2:06               ` changhuaixin
2021-05-12 12:41             ` changhuaixin
2021-03-16 10:40   ` Peter Zijlstra [this message]
2021-03-16  4:49 ` [PATCH v4 2/4] sched/fair: Make CFS bandwidth controller burstable Huaixin Chang
2021-03-16  9:52   ` Peter Zijlstra
2021-03-16  4:49 ` [PATCH v4 3/4] sched/fair: Add cfs bandwidth burst statistics Huaixin Chang
2021-03-16  4:49 ` [PATCH v4 4/4] sched/fair: Add document for burstable CFS bandwidth control Huaixin Chang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YFCLI79Tm0i8uaxH@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bsegall@google.com \
    --cc=changhuaixin@linux.alibaba.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=khlebnikov@yandex-team.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=odin@uged.al \
    --cc=odin@ugedal.com \
    --cc=pauld@redhead.com \
    --cc=pjt@google.com \
    --cc=rostedt@goodmis.org \
    --cc=shanpeic@linux.alibaba.com \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).