All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task
@ 2013-02-12 13:23 Vincent Guittot
  2013-02-12 13:29 ` Vincent Guittot
  2013-02-12 14:53 ` Steven Rostedt
  0 siblings, 2 replies; 4+ messages in thread
From: Vincent Guittot @ 2013-02-12 13:23 UTC (permalink / raw)
  To: linux-kernel, linaro-dev, peterz, mingo, fweisbec, rostedt, efault
  Cc: Vincent Guittot

When a RT task is scheduled on an idle CPU, the update of the rq's load is
not done because CFS's functions are not called. Then, the idle_balance,
which is called just before entering the idle function, updates the
rq's load and makes the assumption that the elapsed time since the last
update, was only running time.

The rq's load of a CPU that only runs a periodic RT task, is close to
LOAD_AVG_MAX whatever the running duration of the RT task is.

A new idle_exit function is called when the prev task is the idle function
so the elapsed time will be accounted as idle time in the rq's load.

Changes since V1:
- move code out of schedule function and create a pre_schedule callback for
  idle class instead.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c      |   10 ++++++++++
 kernel/sched/idle_task.c |    7 +++++++
 kernel/sched/sched.h     |    5 +++++
 3 files changed, 22 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 81fa536..60951f1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1562,6 +1562,16 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
 		se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
 	} /* migrations, e.g. sleep=0 leave decay_count == 0 */
 }
+
+/*
+ * Update the rq's load with the elapsed idle time before a task is
+ * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
+ * be the only way to update the runnable statistic.
+ */
+void idle_exit(int this_cpu, struct rq *this_rq)
+{
+	update_rq_runnable_avg(this_rq, 0);
+}
 #else
 static inline void update_entity_load_avg(struct sched_entity *se,
 					  int update_cfs_rq) {}
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index b6baf37..27cd379 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -13,6 +13,12 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
+
+static void pre_schedule_idle(struct rq *rq, struct task_struct *prev)
+{
+	/* Update rq's load with elapsed idle time */
+	idle_exit(smp_processor_id(), rq);
+}
 #endif /* CONFIG_SMP */
 /*
  * Idle tasks are unconditionally rescheduled:
@@ -86,6 +92,7 @@ const struct sched_class idle_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_idle,
+	.pre_schedule		= pre_schedule_idle,
 #endif
 
 	.set_curr_task          = set_curr_task_idle,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fc88644..9707092 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -877,6 +877,7 @@ extern const struct sched_class idle_sched_class;
 
 extern void trigger_load_balance(struct rq *rq, int cpu);
 extern void idle_balance(int this_cpu, struct rq *this_rq);
+extern void idle_exit(int this_cpu, struct rq *this_rq);
 
 #else	/* CONFIG_SMP */
 
@@ -884,6 +885,10 @@ static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
 
+static inline void idle_exit(int this_cpu, struct rq *this_rq)
+{
+}
+
 #endif
 
 extern void sysrq_sched_debug_show(void);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task
  2013-02-12 13:23 [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task Vincent Guittot
@ 2013-02-12 13:29 ` Vincent Guittot
  2013-02-12 14:53 ` Steven Rostedt
  1 sibling, 0 replies; 4+ messages in thread
From: Vincent Guittot @ 2013-02-12 13:29 UTC (permalink / raw)
  To: rostedt; +Cc: linux-kernel, linaro-dev, peterz, mingo, fweisbec, efault

On 12 February 2013 14:23, Vincent Guittot <vincent.guittot@linaro.org> wrote:
> When a RT task is scheduled on an idle CPU, the update of the rq's load is
> not done because CFS's functions are not called. Then, the idle_balance,
> which is called just before entering the idle function, updates the
> rq's load and makes the assumption that the elapsed time since the last
> update, was only running time.
>
> The rq's load of a CPU that only runs a periodic RT task, is close to
> LOAD_AVG_MAX whatever the running duration of the RT task is.
>
> A new idle_exit function is called when the prev task is the idle function
> so the elapsed time will be accounted as idle time in the rq's load.
>
> Changes since V1:
> - move code out of schedule function and create a pre_schedule callback for
>   idle class instead.

Hi Steve,

I have pushed a new version of my patch to have comments about the
proposed solution but I will rebase it on top of your work when
available

Vincent

>
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>  kernel/sched/fair.c      |   10 ++++++++++
>  kernel/sched/idle_task.c |    7 +++++++
>  kernel/sched/sched.h     |    5 +++++
>  3 files changed, 22 insertions(+)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 81fa536..60951f1 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1562,6 +1562,16 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
>                 se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
>         } /* migrations, e.g. sleep=0 leave decay_count == 0 */
>  }
> +
> +/*
> + * Update the rq's load with the elapsed idle time before a task is
> + * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
> + * be the only way to update the runnable statistic.
> + */
> +void idle_exit(int this_cpu, struct rq *this_rq)
> +{
> +       update_rq_runnable_avg(this_rq, 0);
> +}
>  #else
>  static inline void update_entity_load_avg(struct sched_entity *se,
>                                           int update_cfs_rq) {}
> diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
> index b6baf37..27cd379 100644
> --- a/kernel/sched/idle_task.c
> +++ b/kernel/sched/idle_task.c
> @@ -13,6 +13,12 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
>  {
>         return task_cpu(p); /* IDLE tasks as never migrated */
>  }
> +
> +static void pre_schedule_idle(struct rq *rq, struct task_struct *prev)
> +{
> +       /* Update rq's load with elapsed idle time */
> +       idle_exit(smp_processor_id(), rq);
> +}
>  #endif /* CONFIG_SMP */
>  /*
>   * Idle tasks are unconditionally rescheduled:
> @@ -86,6 +92,7 @@ const struct sched_class idle_sched_class = {
>
>  #ifdef CONFIG_SMP
>         .select_task_rq         = select_task_rq_idle,
> +       .pre_schedule           = pre_schedule_idle,
>  #endif
>
>         .set_curr_task          = set_curr_task_idle,
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index fc88644..9707092 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -877,6 +877,7 @@ extern const struct sched_class idle_sched_class;
>
>  extern void trigger_load_balance(struct rq *rq, int cpu);
>  extern void idle_balance(int this_cpu, struct rq *this_rq);
> +extern void idle_exit(int this_cpu, struct rq *this_rq);
>
>  #else  /* CONFIG_SMP */
>
> @@ -884,6 +885,10 @@ static inline void idle_balance(int cpu, struct rq *rq)
>  {
>  }
>
> +static inline void idle_exit(int this_cpu, struct rq *this_rq)
> +{
> +}
> +
>  #endif
>
>  extern void sysrq_sched_debug_show(void);
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task
  2013-02-12 13:23 [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task Vincent Guittot
  2013-02-12 13:29 ` Vincent Guittot
@ 2013-02-12 14:53 ` Steven Rostedt
  2013-02-12 15:06   ` Vincent Guittot
  1 sibling, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2013-02-12 14:53 UTC (permalink / raw)
  To: Vincent Guittot; +Cc: linux-kernel, linaro-dev, peterz, mingo, fweisbec, efault

On Tue, 2013-02-12 at 14:23 +0100, Vincent Guittot wrote:
> 	.set_curr_task          = set_curr_task_idle,
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index fc88644..9707092 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -877,6 +877,7 @@ extern const struct sched_class idle_sched_class;
>  
>  extern void trigger_load_balance(struct rq *rq, int cpu);
>  extern void idle_balance(int this_cpu, struct rq *this_rq);
> +extern void idle_exit(int this_cpu, struct rq *this_rq);
>  
>  #else	/* CONFIG_SMP */
>  
> @@ -884,6 +885,10 @@ static inline void idle_balance(int cpu, struct rq *rq)
>  {
>  }
>  
> +static inline void idle_exit(int this_cpu, struct rq *this_rq)
> +{
> +}
> +

Is this part needed? I don't see it ever called when !CONFIG_SMP.

-- Steve

>  #endif
>  
>  extern void sysrq_sched_debug_show(void);



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task
  2013-02-12 14:53 ` Steven Rostedt
@ 2013-02-12 15:06   ` Vincent Guittot
  0 siblings, 0 replies; 4+ messages in thread
From: Vincent Guittot @ 2013-02-12 15:06 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, linaro-dev, peterz, mingo, fweisbec, efault

On 12 February 2013 15:53, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Tue, 2013-02-12 at 14:23 +0100, Vincent Guittot wrote:
>>       .set_curr_task          = set_curr_task_idle,
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index fc88644..9707092 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -877,6 +877,7 @@ extern const struct sched_class idle_sched_class;
>>
>>  extern void trigger_load_balance(struct rq *rq, int cpu);
>>  extern void idle_balance(int this_cpu, struct rq *this_rq);
>> +extern void idle_exit(int this_cpu, struct rq *this_rq);
>>
>>  #else        /* CONFIG_SMP */
>>
>> @@ -884,6 +885,10 @@ static inline void idle_balance(int cpu, struct rq *rq)
>>  {
>>  }
>>
>> +static inline void idle_exit(int this_cpu, struct rq *this_rq)
>> +{
>> +}
>> +
>
> Is this part needed? I don't see it ever called when !CONFIG_SMP.

no I forgot to remove it

Vincent
>
> -- Steve
>
>>  #endif
>>
>>  extern void sysrq_sched_debug_show(void);
>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-02-12 15:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-12 13:23 [PATCH v2] sched: fix wrong rq's runnable_avg update with rt task Vincent Guittot
2013-02-12 13:29 ` Vincent Guittot
2013-02-12 14:53 ` Steven Rostedt
2013-02-12 15:06   ` Vincent Guittot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.