From: Vincent Guittot Now that we directly change load_avg and propagate that change into the sums, sys_nice() and co should do the same, otherwise its possible to confuse load accounting when we migrate near the weight change. [peterz: Changelog, call condition] [josef: fixed runnable and !SMP compilation] Signed-off-by: Peter Zijlstra (Intel) Link: http://lkml.kernel.org/r/20170517095045.GA8420@linaro.org --- kernel/sched/core.c | 22 ++++++++++++----- kernel/sched/fair.c | 63 +++++++++++++++++++++++++++++---------------------- kernel/sched/sched.h | 2 + 3 files changed, 54 insertions(+), 33 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -733,7 +733,7 @@ int tg_nop(struct task_group *tg, void * } #endif -static void set_load_weight(struct task_struct *p) +static void set_load_weight(struct task_struct *p, bool update_load) { int prio = p->static_prio - MAX_RT_PRIO; struct load_weight *load = &p->se.load; @@ -747,8 +747,16 @@ static void set_load_weight(struct task_ return; } - load->weight = scale_load(sched_prio_to_weight[prio]); - load->inv_weight = sched_prio_to_wmult[prio]; + /* + * SCHED_OTHER tasks have to update their load when changing their + * weight + */ + if (update_load && p->sched_class == &fair_sched_class) { + reweight_task(p, prio); + } else { + load->weight = scale_load(sched_prio_to_weight[prio]); + load->inv_weight = sched_prio_to_wmult[prio]; + } } static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) @@ -2356,7 +2364,7 @@ int sched_fork(unsigned long clone_flags p->static_prio = NICE_TO_PRIO(0); p->prio = p->normal_prio = __normal_prio(p); - set_load_weight(p); + set_load_weight(p, false); /* * We don't need the reset flag anymore after the fork. It has @@ -3803,7 +3811,7 @@ void set_user_nice(struct task_struct *p put_prev_task(rq, p); p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p); + set_load_weight(p, true); old_prio = p->prio; p->prio = effective_prio(p); delta = p->prio - old_prio; @@ -3960,7 +3968,7 @@ static void __setscheduler_params(struct */ p->rt_priority = attr->sched_priority; p->normal_prio = normal_prio(p); - set_load_weight(p); + set_load_weight(p, true); } /* Actually do priority change: must hold pi & rq lock. */ @@ -5910,7 +5918,7 @@ void __init sched_init(void) atomic_set(&rq->nr_iowait, 0); } - set_load_weight(&init_task); + set_load_weight(&init_task, false); /* * The boot idle thread does lazy MMU switching as well: --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2789,6 +2789,43 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif +static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, + unsigned long weight) +{ + if (se->on_rq) { + /* commit outstanding execution time */ + if (cfs_rq->curr == se) + update_curr(cfs_rq); + account_entity_dequeue(cfs_rq, se); + dequeue_runnable_load_avg(cfs_rq, se); + } + dequeue_load_avg(cfs_rq, se); + + update_load_set(&se->load, weight); + +#ifdef CONFIG_SMP + se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, + LOAD_AVG_MAX - 1024 + se->avg.period_contrib); +#endif + + enqueue_load_avg(cfs_rq, se); + if (se->on_rq) { + account_entity_enqueue(cfs_rq, se); + enqueue_runnable_load_avg(cfs_rq, se); + } +} + +void reweight_task(struct task_struct *p, int prio) +{ + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + struct load_weight *load = &se->load; + unsigned long weight = scale_load(sched_prio_to_weight[prio]); + + reweight_entity(cfs_rq, se, weight); + load->inv_weight = sched_prio_to_wmult[prio]; +} + #ifdef CONFIG_FAIR_GROUP_SCHED # ifdef CONFIG_SMP /* @@ -2892,32 +2929,6 @@ static long calc_cfs_shares(struct cfs_r } # endif /* CONFIG_SMP */ -static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight) -{ - if (se->on_rq) { - /* commit outstanding execution time */ - if (cfs_rq->curr == se) - update_curr(cfs_rq); - account_entity_dequeue(cfs_rq, se); - dequeue_runnable_load_avg(cfs_rq, se); - } - dequeue_load_avg(cfs_rq, se); - - update_load_set(&se->load, weight); - -#ifdef CONFIG_SMP - se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, - LOAD_AVG_MAX - 1024 + se->avg.period_contrib); -#endif - - enqueue_load_avg(cfs_rq, se); - if (se->on_rq) { - account_entity_enqueue(cfs_rq, se); - enqueue_runnable_load_avg(cfs_rq, se); - } -} - static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); static void update_cfs_shares(struct sched_entity *se) --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1532,6 +1532,8 @@ extern void init_sched_dl_class(void); extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); +extern void reweight_task(struct task_struct *p, int prio); + extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu);