From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751691AbdEAQ7w (ORCPT ); Mon, 1 May 2017 12:59:52 -0400 Received: from merlin.infradead.org ([205.233.59.134]:51106 "EHLO merlin.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750998AbdEAQ7t (ORCPT ); Mon, 1 May 2017 12:59:49 -0400 Date: Mon, 1 May 2017 18:59:39 +0200 From: Peter Zijlstra To: Tejun Heo Cc: Ingo Molnar , =?utf-8?B?4oCcbGludXgta2VybmVsQHZnZXIua2VybmVsLm9yZ+KAnQ==?= , Linus Torvalds , Mike Galbraith , Paul Turner , Chris Mason , =?utf-8?B?4oCca2VybmVsLXRlYW1AZmIuY29t4oCd?= , Paul McKenney Subject: Re: [PATCH 1/2] sched/fair: Use task_groups instead of leaf_cfs_rq_list to walk all cfs_rqs Message-ID: <20170501165939.ujfhr5oi35bozsnr@hirez.programming.kicks-ass.net> References: <20170426004039.GA3222@wtj.duckdns.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170426004039.GA3222@wtj.duckdns.org> User-Agent: NeoMutt/20170113 (1.7.2) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Tue, Apr 25, 2017 at 05:40:39PM -0700, Tejun Heo wrote: > @@ -9355,24 +9366,18 @@ void online_fair_sched_group(struct task > void unregister_fair_sched_group(struct task_group *tg) > { > unsigned long flags; > - struct rq *rq; > int cpu; > > for_each_possible_cpu(cpu) { > + struct rq *rq = cpu_rq(cpu); > + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu]; > + > if (tg->se[cpu]) > remove_entity_load_avg(tg->se[cpu]); > > - /* > - * Only empty task groups can be destroyed; so we can speculatively > - * check on_list without danger of it being re-added. > - */ > - if (!tg->cfs_rq[cpu]->on_list) > - continue; > - > - rq = cpu_rq(cpu); > - > raw_spin_lock_irqsave(&rq->lock, flags); > - list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); > + list_del_leaf_cfs_rq(cfs_rq); > + cfs_rq->online = 0; > raw_spin_unlock_irqrestore(&rq->lock, flags); > } > } It would be nice to be able to retain that on_list check and avoid taking all those rq->lock's. Since per the previous mail, those online/offline loops are protected by rq->lock, all we need is to ensure an rcu_sched GP passes between here and sched_free_group(). Which I think we can do differently, see below. Then we don't need the ->online thing and can keep using the ->on_list check. > @@ -9523,11 +9528,11 @@ const struct sched_class fair_sched_clas > #ifdef CONFIG_SCHED_DEBUG > void print_cfs_stats(struct seq_file *m, int cpu) > { > - struct cfs_rq *cfs_rq; > + struct task_group *tg; > > rcu_read_lock(); > - for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) > - print_cfs_rq(m, cpu, cfs_rq); > + list_for_each_entry_rcu(tg, &task_groups, list) > + print_cfs_rq(m, cpu, tg->cfs_rq[cpu]); > rcu_read_unlock(); > } If you have a gazillion empty groups, do we really want to print them? diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 74d9c3a1feee..8bad2c371b18 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -58,6 +58,11 @@ static inline void cond_synchronize_sched(unsigned long oldstate) might_sleep(); } +static bool void should_synchornize_sched(unsigned long oldstate) +{ + return true; +} + extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0bacb6b2af69..9918b632db27 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -78,6 +78,7 @@ unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); unsigned long get_state_synchronize_sched(void); void cond_synchronize_sched(unsigned long oldstate); +bool should_synchronize_sched(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 91fff49d5869..59eb0d115217 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3423,6 +3423,18 @@ void cond_synchronize_sched(unsigned long oldstate) } EXPORT_SYMBOL_GPL(cond_synchronize_sched); +bool should_synchronize_sched(unsigned long oldstate) +{ + unsigned long newstate; + + /* + * Ensure that this load happens before any RCU-destructive + * actions the caller might carry out after we return. + */ + newstate = smp_load_acquire(&rcu_sched_state.completed); + return ULONG_CMP_GE(oldstate, newstate); +} + /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, for the specified type of RCU, returning 1 if so. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ecad8e2d2a29..d7630b34d197 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6362,7 +6362,7 @@ void ia64_set_curr_task(int cpu, struct task_struct *p) /* task_group_lock serializes the addition/removal of task groups */ static DEFINE_SPINLOCK(task_group_lock); -static void sched_free_group(struct task_group *tg) +static void __sched_free_group(struct task_group *tg) { free_fair_sched_group(tg); free_rt_sched_group(tg); @@ -6370,6 +6370,22 @@ static void sched_free_group(struct task_group *tg) kmem_cache_free(task_group_cache, tg); } +static void sched_free_group_rcu_sched(struct rcu_head *rhp) +{ + /* Now it should be safe to free those cfs_rqs: */ + __sched_free_group(container_of(rhp, struct task_group, rcu)); +} + +static void sched_free_group(struct task_group *tg) +{ + if (should_synchronize_sched(tg->rcu_stamp)) { + call_rcu_sched(&tg->rcu, sched_free_group_rcu_sched); + return; + } + + __sched_free_group(tg); +} + /* allocate runqueue etc for a new task group */ struct task_group *sched_create_group(struct task_group *parent) { @@ -6434,6 +6450,8 @@ void sched_offline_group(struct task_group *tg) list_del_rcu(&tg->list); list_del_rcu(&tg->siblings); spin_unlock_irqrestore(&task_group_lock, flags); + + tg->rcu_stamp = get_state_synchronize_sched(); } static void sched_change_group(struct task_struct *tsk, int type) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b2e5e9e64c86..1dbab563e798 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -302,6 +302,7 @@ struct task_group { #endif struct rcu_head rcu; + long rcu_stamp; struct list_head list; struct task_group *parent;