Re: [PATCH] sched/fair: don't push cfs_bandwith slack timers forward

All of lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH] sched/fair: don't push cfs_bandwith slack timers forward
       [not found] <xm26ef47yeyh.fsf@bsegall-linux.svl.corp.google.com>
@ 2019-06-06 14:11 ` Xunlei Pang
  2019-06-06 17:21   ` [PATCH v2] " bsegall
  0 siblings, 1 reply; 10+ messages in thread
From: Xunlei Pang @ 2019-06-06 14:11 UTC (permalink / raw)
  To: bsegall, linux-kernel, Peter Zijlstra; +Cc: Ingo Molnar, Phil Auld

On 2019/6/6 AM 4:06, bsegall@google.com wrote:
> When a cfs_rq sleeps and returns its quota, we delay for 5ms before
> waking any throttled cfs_rqs to coalesce with other cfs_rqs going to
> sleep, as this has has to be done outside of the rq lock we hold.

two "has".

> 
> The current code waits for 5ms without any sleeps, instead of waiting
> for 5ms from the first sleep, which can delay the unthrottle more than
> we want. Switch this around so that we can't push this forward forever.
> 
> This requires an extra flag rather than using hrtimer_active, since we
> need to start a new timer if the current one is in the process of
> finishing.
> 
> Signed-off-by: Ben Segall <bsegall@google.com>
> ---

We've also suffered from this performance issue recently:
Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>

>  kernel/sched/fair.c  | 7 +++++++
>  kernel/sched/sched.h | 1 +
>  2 files changed, 8 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8213ff6e365d..2ead252cfa32 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4729,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
>  	if (runtime_refresh_within(cfs_b, min_left))
>  		return;
>  
> +	/* don't push forwards an existing deferred unthrottle */
> +	if (cfs_b->slack_started)
> +		return;
> +	cfs_b->slack_started = true;
> +
>  	hrtimer_start(&cfs_b->slack_timer,
>  			ns_to_ktime(cfs_bandwidth_slack_period),
>  			HRTIMER_MODE_REL);
> @@ -4782,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
>  
>  	/* confirm we're still not at a refresh boundary */
>  	raw_spin_lock_irqsave(&cfs_b->lock, flags);
> +	cfs_b->slack_started = false;
>  	if (cfs_b->distribute_running) {
>  		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  		return;
> @@ -4920,6 +4926,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
>  	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	cfs_b->slack_timer.function = sched_cfs_slack_timer;
>  	cfs_b->distribute_running = 0;
> +	cfs_b->slack_started = false;
>  }
>  
>  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index efa686eeff26..60219acda94b 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -356,6 +356,7 @@ struct cfs_bandwidth {
>  	u64			throttled_time;
>  
>  	bool                    distribute_running;
> +	bool                    slack_started;
>  #endif
>  };
>  
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-06 14:11 ` [PATCH] sched/fair: don't push cfs_bandwith slack timers forward Xunlei Pang
@ 2019-06-06 17:21   ` bsegall
  2019-06-11 13:04     ` Phil Auld
                       ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: bsegall @ 2019-06-06 17:21 UTC (permalink / raw)
  To: linux-kernel; +Cc: Xunlei Pang, Peter Zijlstra, Ingo Molnar, Phil Auld

When a cfs_rq sleeps and returns its quota, we delay for 5ms before
waking any throttled cfs_rqs to coalesce with other cfs_rqs going to
sleep, as this has to be done outside of the rq lock we hold.

The current code waits for 5ms without any sleeps, instead of waiting
for 5ms from the first sleep, which can delay the unthrottle more than
we want. Switch this around so that we can't push this forward forever.

This requires an extra flag rather than using hrtimer_active, since we
need to start a new timer if the current one is in the process of
finishing.

Signed-off-by: Ben Segall <bsegall@google.com>
Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>
---
 kernel/sched/fair.c  | 7 +++++++
 kernel/sched/sched.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8213ff6e365d..2ead252cfa32 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4729,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
 	if (runtime_refresh_within(cfs_b, min_left))
 		return;
 
+	/* don't push forwards an existing deferred unthrottle */
+	if (cfs_b->slack_started)
+		return;
+	cfs_b->slack_started = true;
+
 	hrtimer_start(&cfs_b->slack_timer,
 			ns_to_ktime(cfs_bandwidth_slack_period),
 			HRTIMER_MODE_REL);
@@ -4782,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 
 	/* confirm we're still not at a refresh boundary */
 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
+	cfs_b->slack_started = false;
 	if (cfs_b->distribute_running) {
 		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 		return;
@@ -4920,6 +4926,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
 	cfs_b->distribute_running = 0;
+	cfs_b->slack_started = false;
 }
 
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efa686eeff26..60219acda94b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -356,6 +356,7 @@ struct cfs_bandwidth {
 	u64			throttled_time;
 
 	bool                    distribute_running;
+	bool                    slack_started;
 #endif
 };
 
-- 
2.22.0.rc1.257.g3120a18244-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-06 17:21   ` [PATCH v2] " bsegall
@ 2019-06-11 13:04     ` Phil Auld
  2019-06-11 13:50       ` Peter Zijlstra
  2019-06-11 13:53     ` Peter Zijlstra
  2019-06-17 14:22     ` [tip:sched/core] sched/fair: Don't " tip-bot for bsegall@google.com
  2 siblings, 1 reply; 10+ messages in thread
From: Phil Auld @ 2019-06-11 13:04 UTC (permalink / raw)
  To: bsegall; +Cc: linux-kernel, Xunlei Pang, Peter Zijlstra, Ingo Molnar

On Thu, Jun 06, 2019 at 10:21:01AM -0700 bsegall@google.com wrote:
> When a cfs_rq sleeps and returns its quota, we delay for 5ms before
> waking any throttled cfs_rqs to coalesce with other cfs_rqs going to
> sleep, as this has to be done outside of the rq lock we hold.
> 
> The current code waits for 5ms without any sleeps, instead of waiting
> for 5ms from the first sleep, which can delay the unthrottle more than
> we want. Switch this around so that we can't push this forward forever.
> 
> This requires an extra flag rather than using hrtimer_active, since we
> need to start a new timer if the current one is in the process of
> finishing.
> 
> Signed-off-by: Ben Segall <bsegall@google.com>
> Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>
> ---
>  kernel/sched/fair.c  | 7 +++++++
>  kernel/sched/sched.h | 1 +
>  2 files changed, 8 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8213ff6e365d..2ead252cfa32 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4729,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
>  	if (runtime_refresh_within(cfs_b, min_left))
>  		return;
>  
> +	/* don't push forwards an existing deferred unthrottle */
> +	if (cfs_b->slack_started)
> +		return;
> +	cfs_b->slack_started = true;
> +
>  	hrtimer_start(&cfs_b->slack_timer,
>  			ns_to_ktime(cfs_bandwidth_slack_period),
>  			HRTIMER_MODE_REL);
> @@ -4782,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
>  
>  	/* confirm we're still not at a refresh boundary */
>  	raw_spin_lock_irqsave(&cfs_b->lock, flags);
> +	cfs_b->slack_started = false;
>  	if (cfs_b->distribute_running) {
>  		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  		return;
> @@ -4920,6 +4926,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
>  	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>  	cfs_b->slack_timer.function = sched_cfs_slack_timer;
>  	cfs_b->distribute_running = 0;
> +	cfs_b->slack_started = false;
>  }
>  
>  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index efa686eeff26..60219acda94b 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -356,6 +356,7 @@ struct cfs_bandwidth {
>  	u64			throttled_time;
>  
>  	bool                    distribute_running;
> +	bool                    slack_started;
>  #endif
>  };
>  
> -- 
> 2.22.0.rc1.257.g3120a18244-goog
> 

I think this looks good. I like not delaying that further even if it
does not fix Dave's use case. 

It does make it glaring that I should have used false/true for setting
distribute_running though :)


Acked-by: Phil Auld <pauld@redhat.com>

-- 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-11 13:04     ` Phil Auld
@ 2019-06-11 13:50       ` Peter Zijlstra
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2019-06-11 13:50 UTC (permalink / raw)
  To: Phil Auld; +Cc: bsegall, linux-kernel, Xunlei Pang, Ingo Molnar

On Tue, Jun 11, 2019 at 09:04:17AM -0400, Phil Auld wrote:
> On Thu, Jun 06, 2019 at 10:21:01AM -0700 bsegall@google.com wrote:
> > When a cfs_rq sleeps and returns its quota, we delay for 5ms before
> > waking any throttled cfs_rqs to coalesce with other cfs_rqs going to
> > sleep, as this has to be done outside of the rq lock we hold.
> > 
> > The current code waits for 5ms without any sleeps, instead of waiting
> > for 5ms from the first sleep, which can delay the unthrottle more than
> > we want. Switch this around so that we can't push this forward forever.
> > 
> > This requires an extra flag rather than using hrtimer_active, since we
> > need to start a new timer if the current one is in the process of
> > finishing.
> > 
> > Signed-off-by: Ben Segall <bsegall@google.com>
> > Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>
> > ---
> >  kernel/sched/fair.c  | 7 +++++++
> >  kernel/sched/sched.h | 1 +
> >  2 files changed, 8 insertions(+)
> > 
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 8213ff6e365d..2ead252cfa32 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -4729,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
> >  	if (runtime_refresh_within(cfs_b, min_left))
> >  		return;
> >  
> > +	/* don't push forwards an existing deferred unthrottle */
> > +	if (cfs_b->slack_started)
> > +		return;
> > +	cfs_b->slack_started = true;
> > +
> >  	hrtimer_start(&cfs_b->slack_timer,
> >  			ns_to_ktime(cfs_bandwidth_slack_period),
> >  			HRTIMER_MODE_REL);
> > @@ -4782,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
> >  
> >  	/* confirm we're still not at a refresh boundary */
> >  	raw_spin_lock_irqsave(&cfs_b->lock, flags);
> > +	cfs_b->slack_started = false;
> >  	if (cfs_b->distribute_running) {
> >  		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
> >  		return;
> > @@ -4920,6 +4926,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
> >  	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> >  	cfs_b->slack_timer.function = sched_cfs_slack_timer;
> >  	cfs_b->distribute_running = 0;
> > +	cfs_b->slack_started = false;
> >  }
> >  
> >  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index efa686eeff26..60219acda94b 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -356,6 +356,7 @@ struct cfs_bandwidth {
> >  	u64			throttled_time;
> >  
> >  	bool                    distribute_running;
> > +	bool                    slack_started;
> >  #endif
> >  };
> >  
> > -- 
> > 2.22.0.rc1.257.g3120a18244-goog
> > 
> 
> I think this looks good. I like not delaying that further even if it
> does not fix Dave's use case. 
> 
> It does make it glaring that I should have used false/true for setting
> distribute_running though :)
> 
> 
> Acked-by: Phil Auld <pauld@redhat.com>

Thanks!

Should this patch have a Fixes: tag?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-06 17:21   ` [PATCH v2] " bsegall
  2019-06-11 13:04     ` Phil Auld
@ 2019-06-11 13:53     ` Peter Zijlstra
  2019-06-11 14:12       ` Phil Auld
  2019-06-11 17:26       ` bsegall
  2019-06-17 14:22     ` [tip:sched/core] sched/fair: Don't " tip-bot for bsegall@google.com
  2 siblings, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2019-06-11 13:53 UTC (permalink / raw)
  To: bsegall; +Cc: linux-kernel, Xunlei Pang, Ingo Molnar, Phil Auld

On Thu, Jun 06, 2019 at 10:21:01AM -0700, bsegall@google.com wrote:
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index efa686eeff26..60219acda94b 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -356,6 +356,7 @@ struct cfs_bandwidth {
>  	u64			throttled_time;
>  
>  	bool                    distribute_running;
> +	bool                    slack_started;
>  #endif
>  };

I'm thinking we can this instead? afaict both idle and period_active are
already effecitively booleans and don't need the full 16 bits.

--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -338,8 +338,10 @@ struct cfs_bandwidth {
 	u64			runtime_expires;
 	int			expires_seq;
 
-	short			idle;
-	short			period_active;
+	u8			idle;
+	u8			period_active;
+	u8			distribute_running;
+	u8			slack_started;
 	struct hrtimer		period_timer;
 	struct hrtimer		slack_timer;
 	struct list_head	throttled_cfs_rq;
@@ -348,9 +350,6 @@ struct cfs_bandwidth {
 	int			nr_periods;
 	int			nr_throttled;
 	u64			throttled_time;
-
-	bool                    distribute_running;
-	bool                    slack_started;
 #endif
 };
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-11 13:53     ` Peter Zijlstra
@ 2019-06-11 14:12       ` Phil Auld
  2019-06-11 14:24         ` Peter Zijlstra
  2019-06-11 17:26       ` bsegall
  1 sibling, 1 reply; 10+ messages in thread
From: Phil Auld @ 2019-06-11 14:12 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: bsegall, linux-kernel, Xunlei Pang, Ingo Molnar

On Tue, Jun 11, 2019 at 03:53:25PM +0200 Peter Zijlstra wrote:
> On Thu, Jun 06, 2019 at 10:21:01AM -0700, bsegall@google.com wrote:
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index efa686eeff26..60219acda94b 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -356,6 +356,7 @@ struct cfs_bandwidth {
> >  	u64			throttled_time;
> >  
> >  	bool                    distribute_running;
> > +	bool                    slack_started;
> >  #endif
> >  };
> 
> I'm thinking we can this instead? afaict both idle and period_active are
> already effecitively booleans and don't need the full 16 bits.
> 
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -338,8 +338,10 @@ struct cfs_bandwidth {
>  	u64			runtime_expires;
>  	int			expires_seq;
>  
> -	short			idle;
> -	short			period_active;
> +	u8			idle;
> +	u8			period_active;
> +	u8			distribute_running;
> +	u8			slack_started;
>  	struct hrtimer		period_timer;
>  	struct hrtimer		slack_timer;
>  	struct list_head	throttled_cfs_rq;
> @@ -348,9 +350,6 @@ struct cfs_bandwidth {
>  	int			nr_periods;
>  	int			nr_throttled;
>  	u64			throttled_time;
> -
> -	bool                    distribute_running;
> -	bool                    slack_started;
>  #endif
>  };
>  


That looks reasonable to me. 

Out of curiosity, why not bool? Is sizeof bool architecture dependent?

-- 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-11 14:12       ` Phil Auld
@ 2019-06-11 14:24         ` Peter Zijlstra
  2019-06-11 15:06           ` Phil Auld
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2019-06-11 14:24 UTC (permalink / raw)
  To: Phil Auld; +Cc: bsegall, linux-kernel, Xunlei Pang, Ingo Molnar

On Tue, Jun 11, 2019 at 10:12:19AM -0400, Phil Auld wrote:

> That looks reasonable to me. 
> 
> Out of curiosity, why not bool? Is sizeof bool architecture dependent?

Yeah, sizeof(_Bool) is unspecified and depends on ABI. It is mostly 1,
but there are known cases where it is 4.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-11 14:24         ` Peter Zijlstra
@ 2019-06-11 15:06           ` Phil Auld
  0 siblings, 0 replies; 10+ messages in thread
From: Phil Auld @ 2019-06-11 15:06 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: bsegall, linux-kernel, Xunlei Pang, Ingo Molnar

On Tue, Jun 11, 2019 at 04:24:43PM +0200 Peter Zijlstra wrote:
> On Tue, Jun 11, 2019 at 10:12:19AM -0400, Phil Auld wrote:
> 
> > That looks reasonable to me. 
> > 
> > Out of curiosity, why not bool? Is sizeof bool architecture dependent?
> 
> Yeah, sizeof(_Bool) is unspecified and depends on ABI. It is mostly 1,
> but there are known cases where it is 4.

Makes sense. Thanks!

-- 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] sched/fair: don't push cfs_bandwith slack timers forward
  2019-06-11 13:53     ` Peter Zijlstra
  2019-06-11 14:12       ` Phil Auld
@ 2019-06-11 17:26       ` bsegall
  1 sibling, 0 replies; 10+ messages in thread
From: bsegall @ 2019-06-11 17:26 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, Xunlei Pang, Ingo Molnar, Phil Auld

Peter Zijlstra <peterz@infradead.org> writes:

> On Thu, Jun 06, 2019 at 10:21:01AM -0700, bsegall@google.com wrote:
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index efa686eeff26..60219acda94b 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -356,6 +356,7 @@ struct cfs_bandwidth {
>>  	u64			throttled_time;
>>  
>>  	bool                    distribute_running;
>> +	bool                    slack_started;
>>  #endif
>>  };
>
> I'm thinking we can this instead? afaict both idle and period_active are
> already effecitively booleans and don't need the full 16 bits.
>
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -338,8 +338,10 @@ struct cfs_bandwidth {
>  	u64			runtime_expires;
>  	int			expires_seq;
>  
> -	short			idle;
> -	short			period_active;
> +	u8			idle;
> +	u8			period_active;
> +	u8			distribute_running;
> +	u8			slack_started;
>  	struct hrtimer		period_timer;
>  	struct hrtimer		slack_timer;
>  	struct list_head	throttled_cfs_rq;
> @@ -348,9 +350,6 @@ struct cfs_bandwidth {
>  	int			nr_periods;
>  	int			nr_throttled;
>  	u64			throttled_time;
> -
> -	bool                    distribute_running;
> -	bool                    slack_started;
>  #endif
>  };
>  


Yeah, that makes sense to me, should I spin up another version of the
patch doing this too or do you have it from here?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:sched/core] sched/fair: Don't push cfs_bandwith slack timers forward
  2019-06-06 17:21   ` [PATCH v2] " bsegall
  2019-06-11 13:04     ` Phil Auld
  2019-06-11 13:53     ` Peter Zijlstra
@ 2019-06-17 14:22     ` tip-bot for bsegall@google.com
  2 siblings, 0 replies; 10+ messages in thread
From: tip-bot for bsegall@google.com @ 2019-06-17 14:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, linux-kernel, torvalds, tglx, hpa, mingo, bsegall, pauld, xlpang

Commit-ID:  66567fcbaecac455caa1b13643155d686b51ce63
Gitweb:     https://git.kernel.org/tip/66567fcbaecac455caa1b13643155d686b51ce63
Author:     bsegall@google.com <bsegall@google.com>
AuthorDate: Thu, 6 Jun 2019 10:21:01 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Mon, 17 Jun 2019 12:16:01 +0200

sched/fair: Don't push cfs_bandwith slack timers forward

When a cfs_rq sleeps and returns its quota, we delay for 5ms before
waking any throttled cfs_rqs to coalesce with other cfs_rqs going to
sleep, as this has to be done outside of the rq lock we hold.

The current code waits for 5ms without any sleeps, instead of waiting
for 5ms from the first sleep, which can delay the unthrottle more than
we want. Switch this around so that we can't push this forward forever.

This requires an extra flag rather than using hrtimer_active, since we
need to start a new timer if the current one is in the process of
finishing.

Signed-off-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>
Acked-by: Phil Auld <pauld@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/xm26a7euy6iq.fsf_-_@bsegall-linux.svl.corp.google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c  | 7 +++++++
 kernel/sched/sched.h | 8 ++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4c8f45ed093c..3c11dcdedcbc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4729,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
 	if (runtime_refresh_within(cfs_b, min_left))
 		return;
 
+	/* don't push forwards an existing deferred unthrottle */
+	if (cfs_b->slack_started)
+		return;
+	cfs_b->slack_started = true;
+
 	hrtimer_start(&cfs_b->slack_timer,
 			ns_to_ktime(cfs_bandwidth_slack_period),
 			HRTIMER_MODE_REL);
@@ -4782,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 
 	/* confirm we're still not at a refresh boundary */
 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
+	cfs_b->slack_started = false;
 	if (cfs_b->distribute_running) {
 		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 		return;
@@ -4945,6 +4951,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
 	cfs_b->distribute_running = 0;
+	cfs_b->slack_started = false;
 }
 
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 607859a18b2a..b08dee29ef5e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -338,8 +338,10 @@ struct cfs_bandwidth {
 	u64			runtime_expires;
 	int			expires_seq;
 
-	short			idle;
-	short			period_active;
+	u8			idle;
+	u8			period_active;
+	u8			distribute_running;
+	u8			slack_started;
 	struct hrtimer		period_timer;
 	struct hrtimer		slack_timer;
 	struct list_head	throttled_cfs_rq;
@@ -348,8 +350,6 @@ struct cfs_bandwidth {
 	int			nr_periods;
 	int			nr_throttled;
 	u64			throttled_time;
-
-	bool                    distribute_running;
 #endif
 };
 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-06-17 14:23 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <xm26ef47yeyh.fsf@bsegall-linux.svl.corp.google.com>
2019-06-06 14:11 ` [PATCH] sched/fair: don't push cfs_bandwith slack timers forward Xunlei Pang
2019-06-06 17:21   ` [PATCH v2] " bsegall
2019-06-11 13:04     ` Phil Auld
2019-06-11 13:50       ` Peter Zijlstra
2019-06-11 13:53     ` Peter Zijlstra
2019-06-11 14:12       ` Phil Auld
2019-06-11 14:24         ` Peter Zijlstra
2019-06-11 15:06           ` Phil Auld
2019-06-11 17:26       ` bsegall
2019-06-17 14:22     ` [tip:sched/core] sched/fair: Don't " tip-bot for bsegall@google.com

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.