From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755425Ab1EPNiA (ORCPT ); Mon, 16 May 2011 09:38:00 -0400 Received: from mail-pz0-f46.google.com ([209.85.210.46]:35664 "EHLO mail-pz0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755270Ab1EPNh6 (ORCPT ); Mon, 16 May 2011 09:37:58 -0400 DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=date:from:to:cc:subject:message-id:reply-to:references:mime-version :content-type:content-disposition:in-reply-to:user-agent; b=Fn0NUx2KqutBraHr+l3qAVCbf83kkp9bAZA/E7KLaGTckr/rbYjyyQE9YxyQDMqFup DRSjktMHhntR1RGTqz/kiLG8SCfP7E7Pnxx+YmK25uEg/tQvYU8+yxZdu6sO22I7kaxU XhpfWGkKg07SRXNkqiW8LAk8f8cz5AlOdoSqY= Date: Mon, 16 May 2011 21:37:48 +0800 From: Yong Zhang To: Peter Zijlstra Cc: KOSAKI Motohiro , Oleg Nesterov , LKML , Andrew Morton , Ingo Molnar , Li Zefan , Miao Xie Subject: Re: [PATCH 1/2] cpuset: fix cpuset_cpus_allowed_fallback() don't update tsk->rt.nr_cpus_allowed Message-ID: <20110516133748.GB2058@zhy> Reply-To: Yong Zhang References: <20110428161149.GA15658@redhat.com> <20110502194416.2D61.A69D9226@jp.fujitsu.com> <20110502195657.2D68.A69D9226@jp.fujitsu.com> <1305129929.2914.247.camel@laptop> <4DCCC61F.80408@jp.fujitsu.com> <1305306135.2466.173.camel@twins> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <1305306135.2466.173.camel@twins> User-Agent: Mutt/1.5.20 (2009-06-14) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Fri, May 13, 2011 at 07:02:15PM +0200, Peter Zijlstra wrote: > On Fri, 2011-05-13 at 14:42 +0800, Yong Zhang wrote: > > > - rcu_cpu_kthread_should_stop() call set_cpus_allowed_ptr() again > > > periodically. > > > then, it can reset cpumask if cpuset_cpus_allowed_fallback() change it. > > > my debug print obseve following cpumask change occur at boot time. > > > 1) kthread_bind: bind cpu1 > > > 2) cpuset_cpus_allowed_fallback: bind possible cpu > > > 3) rcu_cpu_kthread_should_stop: rebind cpu1 > > > - while tsk->rt.nr_cpus_allowed == 1, sched load balancer never be crash. > > > > Seems rcu_spawn_one_cpu_kthread() call wake_up_process() directly, > > which is under hotplug event CPU_UP_PREPARE. Maybe it should be > > under CPU_ONLINE. > > IIRC I talked to Paul about this a while back and ONLINE is too late, > however STARTING should work. At the time he couldn't quite get that to > work, but the above situation is indeed the root cause of our problems. > > We shouldn't try to run a cpu affine thread before the cpu in question > is actually able to run stuff. But I'm afraid this patch still doesn't help. If I understand your patch correctly, you just put the wake up to CPU_STARTING, but it's still before CPU_ONLINE. Please check my mail to Paul's in this thread group. Thanks, Yong > > I did me a little hackery and with the below patch my kernel still > boots... > > Would that sort your issue? > > --- > kernel/rcutree.c | 44 ++++++++++++++++++++++++++++++++++++++------ > kernel/rcutree_plugin.h | 1 - > 2 files changed, 38 insertions(+), 7 deletions(-) > > diff --git a/kernel/rcutree.c b/kernel/rcutree.c > index 5616b17..e0218ed 100644 > --- a/kernel/rcutree.c > +++ b/kernel/rcutree.c > @@ -1656,7 +1656,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) > per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; > WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); > per_cpu(rcu_cpu_kthread_task, cpu) = t; > - wake_up_process(t); > sp.sched_priority = RCU_KTHREAD_PRIO; > sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); > return 0; > @@ -1764,13 +1763,33 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, > raw_spin_lock_irqsave(&rnp->lock, flags); > rnp->node_kthread_task = t; > raw_spin_unlock_irqrestore(&rnp->lock, flags); > - wake_up_process(t); > sp.sched_priority = 99; > sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); > } > return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); > } > > +static void __cpuinit rcu_wake_cpu_kthread(int cpu) > +{ > + struct task_struct *p = per_cpu(rcu_cpu_kthread_task, cpu); > + > + if (p) > + wake_up_process(p); > +} > + > +static void __cpuinit rcu_wake_node_kthread(struct rcu_node *rnp) > +{ > + if (!rnp) > + return; > + > + if (rnp->node_kthread_task) > + wake_up_process(rnp->node_kthread_task); > +#ifdef CONFIG_RCU_BOOST > + if (rnp->boost_kthread_task) > + wake_up_process(rnp->boost_kthread_task); > +#endif > +} > + > /* > * Spawn all kthreads -- called as soon as the scheduler is running. > */ > @@ -1783,19 +1802,24 @@ static int __init rcu_spawn_kthreads(void) > for_each_possible_cpu(cpu) { > init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); > per_cpu(rcu_cpu_has_work, cpu) = 0; > - if (cpu_online(cpu)) > + if (cpu_online(cpu)) { > (void)rcu_spawn_one_cpu_kthread(cpu); > + rcu_wake_cpu_kthread(cpu); > + } > } > rnp = rcu_get_root(rcu_state); > init_waitqueue_head(&rnp->node_wq); > rcu_init_boost_waitqueue(rnp); > (void)rcu_spawn_one_node_kthread(rcu_state, rnp); > - if (NUM_RCU_NODES > 1) > + rcu_wake_node_kthread(rnp); > + if (NUM_RCU_NODES > 1) { > rcu_for_each_leaf_node(rcu_state, rnp) { > init_waitqueue_head(&rnp->node_wq); > rcu_init_boost_waitqueue(rnp); > (void)rcu_spawn_one_node_kthread(rcu_state, rnp); > + rcu_wake_node_kthread(rnp); > } > + } > return 0; > } > early_initcall(rcu_spawn_kthreads); > @@ -2206,7 +2230,7 @@ static void __cpuinit rcu_online_cpu(int cpu) > rcu_preempt_init_percpu_data(cpu); > } > > -static void __cpuinit rcu_online_kthreads(int cpu) > +static void __cpuinit rcu_prepare_kthreads(int cpu) > { > struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); > struct rcu_node *rnp = rdp->mynode; > @@ -2233,7 +2257,15 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, > case CPU_UP_PREPARE: > case CPU_UP_PREPARE_FROZEN: > rcu_online_cpu(cpu); > - rcu_online_kthreads(cpu); > + rcu_prepare_kthreads(cpu); > + break; > + case CPU_STARTING: > + rcu_wake_cpu_kthread(cpu); > + do { > + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); > + if (rdp) > + rcu_wake_node_kthread(rdp->mynode); > + } while (0); > break; > case CPU_ONLINE: > case CPU_DOWN_FAILED: > diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h > index ed339702..961a316 100644 > --- a/kernel/rcutree_plugin.h > +++ b/kernel/rcutree_plugin.h > @@ -1306,7 +1306,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, > raw_spin_lock_irqsave(&rnp->lock, flags); > rnp->boost_kthread_task = t; > raw_spin_unlock_irqrestore(&rnp->lock, flags); > - wake_up_process(t); > sp.sched_priority = RCU_KTHREAD_PRIO; > sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); > return 0;