From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750872AbcFATtl (ORCPT ); Wed, 1 Jun 2016 15:49:41 -0400 Received: from merlin.infradead.org ([205.233.59.134]:50007 "EHLO merlin.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750720AbcFATtk (ORCPT ); Wed, 1 Jun 2016 15:49:40 -0400 Date: Wed, 1 Jun 2016 21:49:30 +0200 From: Peter Zijlstra To: Morten Rasmussen Cc: mingo@redhat.com, dietmar.eggemann@arm.com, yuyang.du@intel.com, vincent.guittot@linaro.org, mgalbraith@suse.de, linux-kernel@vger.kernel.org, Rik van Riel Subject: Re: [PATCH 02/16] sched/fair: Consistent use of prev_cpu in wakeup path Message-ID: <20160601194930.GC28447@twins.programming.kicks-ass.net> References: <1464001138-25063-1-git-send-email-morten.rasmussen@arm.com> <1464001138-25063-3-git-send-email-morten.rasmussen@arm.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1464001138-25063-3-git-send-email-morten.rasmussen@arm.com> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Mon, May 23, 2016 at 11:58:44AM +0100, Morten Rasmussen wrote: > In commit ac66f5477239 ("sched/numa: Introduce migrate_swap()") > select_task_rq() got a 'cpu' argument to enable overriding of prev_cpu > in special cases (NUMA task swapping). However, the > select_task_rq_fair() helper functions: wake_affine() and > select_idle_sibling(), still use task_cpu(p) directly to work out > prev_cpu which leads to inconsistencies. > > This patch passes prev_cpu (potentially overridden by NUMA code) into > the helper functions to ensure prev_cpu is indeed the same cpu > everywhere in the wakeup path. Rik, can you get this ran through the NUMA benchmarks? > > cc: Ingo Molnar > cc: Peter Zijlstra > > Signed-off-by: Morten Rasmussen > --- > kernel/sched/fair.c | 24 +++++++++++++----------- > 1 file changed, 13 insertions(+), 11 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 218f8e8..c49e25a 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -656,7 +656,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) > } > > #ifdef CONFIG_SMP > -static int select_idle_sibling(struct task_struct *p, int cpu); > +static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); > static unsigned long task_h_load(struct task_struct *p); > > /* > @@ -1502,7 +1502,8 @@ static void task_numa_compare(struct task_numa_env *env, > * Call select_idle_sibling to maybe find a better one. > */ > if (!cur) > - env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu); > + env->dst_cpu = select_idle_sibling(env->p, env->src_cpu, > + env->dst_cpu); > > assign: > assigned = true; > @@ -5013,18 +5014,18 @@ static int wake_wide(struct task_struct *p) > return 1; > } > > -static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) > +static int wake_affine(struct sched_domain *sd, struct task_struct *p, > + int prev_cpu, int sync) > { > s64 this_load, load; > s64 this_eff_load, prev_eff_load; > - int idx, this_cpu, prev_cpu; > + int idx, this_cpu; > struct task_group *tg; > unsigned long weight; > int balanced; > > idx = sd->wake_idx; > this_cpu = smp_processor_id(); > - prev_cpu = task_cpu(p); > load = source_load(prev_cpu, idx); > this_load = target_load(this_cpu, idx); > > @@ -5189,11 +5190,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) > /* > * Try and locate an idle CPU in the sched_domain. > */ > -static int select_idle_sibling(struct task_struct *p, int target) > +static int select_idle_sibling(struct task_struct *p, int prev, int target) > { > struct sched_domain *sd; > struct sched_group *sg; > - int i = task_cpu(p); > > if (idle_cpu(target)) > return target; > @@ -5201,8 +5201,8 @@ static int select_idle_sibling(struct task_struct *p, int target) > /* > * If the prevous cpu is cache affine and idle, don't be stupid. > */ > - if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) > - return i; > + if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) > + return prev; > > /* > * Otherwise, iterate the domains and find an eligible idle cpu. > @@ -5223,6 +5223,8 @@ static int select_idle_sibling(struct task_struct *p, int target) > for_each_lower_domain(sd) { > sg = sd->groups; > do { > + int i; > + > if (!cpumask_intersects(sched_group_cpus(sg), > tsk_cpus_allowed(p))) > goto next; > @@ -5331,13 +5333,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f > > if (affine_sd) { > sd = NULL; /* Prefer wake_affine over balance flags */ > - if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) > + if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync)) > new_cpu = cpu; > } > > if (!sd) { > if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ > - new_cpu = select_idle_sibling(p, new_cpu); > + new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); > > } else while (sd) { > struct sched_group *group; > -- > 1.9.1 >