linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Paul McKenney <paulmck@linux.vnet.ibm.com>,
	"Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>,
	Arjan van de Veen <arjan@infradead.org>,
	Paul Turner <pjt@google.com>,
	Richard Weinberger <rw@linutronix.de>,
	Magnus Damm <magnus.damm@gmail.com>
Subject: [patch 10/40] sched: Convert to state machine callbacks
Date: Thu, 31 Jan 2013 12:11:19 -0000	[thread overview]
Message-ID: <20130131120742.160047181@linutronix.de> (raw)
In-Reply-To: 20130131120348.372374706@linutronix.de

[-- Attachment #1: cpuhotplug-sched-convert-active-inactive.patch --]
[-- Type: text/plain, Size: 9305 bytes --]

The scheduler sports quite a bunch of hotplug notifiers. One reason
for multiple notifiers is the fact, that the startup and teardown
process are asymetric. Now the scheduler wants to be called early on
startup and late on teardown. That requires to install two different
notifiers for the same issue.

With the state machine implementation we can register a callback pair
for startup and teardown at the appropriate spot.

This patch converts the notifiers which are setup with special
priorities and combines CPU_PRI_SCHED and CPU_PRI_CPUSET notifiers to
a single callback. They run back to back anyway and we can make sure
in the callbacks that the ordering inside the scheduler is
correct. These notifiers are installed in sched_init_smp() as we can't
run them during the bringup of the non boot cpus because the smp
scheduler is setup after that. It would be nice if we just could
compile them in, but that needs a larger surgery to the scheduler code
and is beyond the scope of this patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h        |   16 ----
 include/linux/cpuhotplug.h |    6 +
 kernel/cpu.c               |    4 +
 kernel/sched/core.c        |  154 +++++++++++++++++----------------------------
 4 files changed, 69 insertions(+), 111 deletions(-)

Index: linux-2.6/include/linux/cpu.h
===================================================================
--- linux-2.6.orig/include/linux/cpu.h
+++ linux-2.6/include/linux/cpu.h
@@ -58,22 +58,6 @@ extern ssize_t arch_print_cpu_modalias(s
  * CPU notifier priorities.
  */
 enum {
-	/*
-	 * SCHED_ACTIVE marks a cpu which is coming up active during
-	 * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
-	 * notifier.  CPUSET_ACTIVE adjusts cpuset according to
-	 * cpu_active mask right after SCHED_ACTIVE.  During
-	 * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
-	 * ordered in the similar way.
-	 *
-	 * This ordering guarantees consistent cpu_active mask and
-	 * migration behavior to all cpu notifiers.
-	 */
-	CPU_PRI_SCHED_ACTIVE	= INT_MAX,
-	CPU_PRI_CPUSET_ACTIVE	= INT_MAX - 1,
-	CPU_PRI_SCHED_INACTIVE	= INT_MIN + 1,
-	CPU_PRI_CPUSET_INACTIVE	= INT_MIN,
-
 	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
 	CPU_PRI_MIGRATION	= 10,
Index: linux-2.6/include/linux/cpuhotplug.h
===================================================================
--- linux-2.6.orig/include/linux/cpuhotplug.h
+++ linux-2.6/include/linux/cpuhotplug.h
@@ -6,13 +6,16 @@ enum cpuhp_states {
 	CPUHP_CREATE_THREADS,
 	CPUHP_NOTIFY_PREPARE,
 	CPUHP_NOTIFY_DEAD,
+	CPUHP_SCHED_DEAD,
 	CPUHP_BRINGUP_CPU,
 	CPUHP_AP_OFFLINE,
+	CPUHP_AP_SCHED_STARTING,
 	CPUHP_AP_NOTIFY_STARTING,
 	CPUHP_AP_NOTIFY_DYING,
 	CPUHP_AP_MAX,
 	CPUHP_TEARDOWN_CPU,
 	CPUHP_PERCPU_THREADS,
+	CPUHP_SCHED_ONLINE,
 	CPUHP_NOTIFY_ONLINE,
 	CPUHP_NOTIFY_DOWN_PREPARE,
 	CPUHP_MAX,
@@ -87,4 +90,7 @@ static inline void cpuhp_remove_state_no
 	__cpuhp_remove_state(state, false);
 }
 
+/* Compiled in scheduler hotplug functions */
+int sched_starting_cpu(unsigned int cpu);
+
 #endif
Index: linux-2.6/kernel/cpu.c
===================================================================
--- linux-2.6.orig/kernel/cpu.c
+++ linux-2.6/kernel/cpu.c
@@ -788,6 +788,10 @@ static struct cpuhp_step cpuhp_bp_states
 /* Application processor state steps */
 static struct cpuhp_step cpuhp_ap_states[] = {
 #ifdef CONFIG_SMP
+	[CPUHP_AP_SCHED_STARTING] = {
+		.startup = sched_starting_cpu,
+		.teardown = NULL,
+	},
 	[CPUHP_AP_NOTIFY_STARTING] = {
 		.startup = notify_starting,
 		.teardown = NULL,
Index: linux-2.6/kernel/sched/core.c
===================================================================
--- linux-2.6.orig/kernel/sched/core.c
+++ linux-2.6/kernel/sched/core.c
@@ -5167,31 +5167,6 @@ static struct notifier_block __cpuinitda
 	.priority = CPU_PRI_MIGRATION,
 };
 
-static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
-				      unsigned long action, void *hcpu)
-{
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_STARTING:
-	case CPU_DOWN_FAILED:
-		set_cpu_active((long)hcpu, true);
-		return NOTIFY_OK;
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
-{
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_PREPARE:
-		set_cpu_active((long)hcpu, false);
-		return NOTIFY_OK;
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
 static int __init migration_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
@@ -5203,10 +5178,6 @@ static int __init migration_init(void)
 	migration_call(&migration_notifier, CPU_ONLINE, cpu);
 	register_cpu_notifier(&migration_notifier);
 
-	/* Register cpu active notifiers */
-	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
-	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
-
 	return 0;
 }
 early_initcall(migration_init);
@@ -6292,42 +6263,12 @@ static void sched_domains_numa_masks_cle
 	}
 }
 
-/*
- * Update sched_domains_numa_masks[level][node] array when new cpus
- * are onlined.
- */
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
-					   unsigned long action,
-					   void *hcpu)
-{
-	int cpu = (long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-		sched_domains_numa_masks_set(cpu);
-		break;
-
-	case CPU_DEAD:
-		sched_domains_numa_masks_clear(cpu);
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	return NOTIFY_OK;
-}
 #else
-static inline void sched_init_numa(void)
-{
-}
-
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
-					   unsigned long action,
-					   void *hcpu)
-{
-	return 0;
-}
+static inline void sched_init_numa(void) { }
+#ifdef CONFIG_HOTPLUG_CPU
+static void sched_domains_numa_masks_set(int cpu) { }
+static void sched_domains_numa_masks_clear(int cpu) { }
+#endif
 #endif /* CONFIG_NUMA */
 
 static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -6696,6 +6637,7 @@ match2:
 	mutex_unlock(&sched_domains_mutex);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static int num_cpus_frozen;	/* used to mark begin/end of suspend/resume */
 
 /*
@@ -6706,13 +6648,9 @@ static int num_cpus_frozen;	/* used to m
  * If we come here as part of a suspend/resume, don't touch cpusets because we
  * want to restore it back to its original state upon resume anyway.
  */
-static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
-			     void *hcpu)
+static void cpuset_cpu_active(void)
 {
-	switch (action) {
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_FAILED_FROZEN:
-
+	if (cpuhp_tasks_frozen) {
 		/*
 		 * num_cpus_frozen tracks how many CPUs are involved in suspend
 		 * resume sequence. As long as this is not the last online
@@ -6722,40 +6660,62 @@ static int cpuset_cpu_active(struct noti
 		num_cpus_frozen--;
 		if (likely(num_cpus_frozen)) {
 			partition_sched_domains(1, NULL, NULL);
-			break;
+			return;
 		}
-
 		/*
 		 * This is the last CPU online operation. So fall through and
 		 * restore the original sched domains by considering the
 		 * cpuset configurations.
 		 */
-
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		cpuset_update_active_cpus(true);
-		break;
-	default:
-		return NOTIFY_DONE;
 	}
-	return NOTIFY_OK;
+	cpuset_update_active_cpus(true);
 }
 
-static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
-			       void *hcpu)
+static void cpuset_cpu_inactive(void)
 {
-	switch (action) {
-	case CPU_DOWN_PREPARE:
-		cpuset_update_active_cpus(false);
-		break;
-	case CPU_DOWN_PREPARE_FROZEN:
+	if (cpuhp_tasks_frozen) {
 		num_cpus_frozen++;
 		partition_sched_domains(1, NULL, NULL);
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-	return NOTIFY_OK;
+	} else
+		cpuset_update_active_cpus(false);
+}
+
+static int sched_dead_cpu(unsigned int cpu)
+{
+	sched_domains_numa_masks_clear(cpu);
+	return 0;
+}
+
+static int sched_online_cpu(unsigned int cpu)
+{
+	/* Looks redundant, but we need it in case of down canceled */
+	set_cpu_active(cpu, true);
+	/*
+	 * Asymetric to sched_dead_cpu, but this just fiddles with
+	 * bits. Sigh
+	 */
+	sched_domains_numa_masks_set(cpu);
+	/* This is actually symetric */
+	cpuset_cpu_active();
+	return 0;
+}
+
+static int sched_offline_cpu(unsigned int cpu)
+{
+	set_cpu_active(cpu, false);
+	cpuset_cpu_inactive();
+	return 0;
+}
+#else
+#define sched_dead_cpu		NULL
+#define sched_online_cpu	NULL
+#define sched_offline_cpu	NULL
+#endif
+
+int __cpuinit sched_starting_cpu(unsigned int cpu)
+{
+	set_cpu_active(cpu, true);
+	return 0;
 }
 
 void __init sched_init_smp(void)
@@ -6776,9 +6736,13 @@ void __init sched_init_smp(void)
 	mutex_unlock(&sched_domains_mutex);
 	put_online_cpus();
 
-	hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
-	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
-	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
+	/*
+	 * Note: These callbacks are installed late because we init
+	 * numa and sched domains after we brought up the cpus.
+	 */
+	cpuhp_setup_state_nocalls(CPUHP_SCHED_DEAD, NULL, sched_dead_cpu);
+	cpuhp_setup_state_nocalls(CPUHP_SCHED_ONLINE, sched_online_cpu,
+				  sched_offline_cpu);
 
 	/* RT runtime code needs to handle some hotplug events */
 	hotcpu_notifier(update_runtime, 0);



  parent reply	other threads:[~2013-01-31 12:11 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-31 15:44 [patch 00/40] CPU hotplug rework - episode I Thomas Gleixner
2013-01-31 12:11 ` [patch 01/40] smpboot: Allow selfparking per cpu threads Thomas Gleixner
2013-02-09  0:29   ` Paul E. McKenney
2013-02-14 17:46   ` [tip:smp/hotplug] " tip-bot for Thomas Gleixner
2013-01-31 12:11 ` [patch 02/40] stop_machine: Store task reference in a separate per cpu variable Thomas Gleixner
2013-02-09  0:33   ` Paul E. McKenney
2013-02-14 17:47   ` [tip:smp/hotplug] " tip-bot for Thomas Gleixner
2013-01-31 12:11 ` [patch 03/40] stop_machine: Use smpboot threads Thomas Gleixner
2013-02-09  0:39   ` Paul E. McKenney
2013-02-14 17:49   ` [tip:smp/hotplug] " tip-bot for Thomas Gleixner
2013-01-31 12:11 ` [patch 04/40] cpu: Restructure FROZEN state handling Thomas Gleixner
2013-02-09  0:52   ` Paul E. McKenney
2014-10-09 16:53   ` Borislav Petkov
2013-01-31 12:11 ` [patch 05/40] cpu: Restructure cpu_down code Thomas Gleixner
2013-02-09  0:49   ` Paul E. McKenney
2014-10-09 17:05   ` Borislav Petkov
2013-01-31 12:11 ` [patch 06/40] cpu: hotplug: Split out cpu down functions Thomas Gleixner
2013-02-09  0:54   ` Paul E. McKenney
2013-01-31 12:11 ` [patch 07/40] cpu: hotplug: Convert to a state machine for the control processor Thomas Gleixner
2013-02-11 20:09   ` Paul E. McKenney
2013-01-31 12:11 ` [patch 08/40] cpu: hotplug: Convert the hotplugged processor work to a state machine Thomas Gleixner
2013-02-11 20:17   ` Paul E. McKenney
2013-01-31 12:11 ` Thomas Gleixner [this message]
2013-02-11 23:46   ` [patch 10/40] sched: Convert to state machine callbacks Paul E. McKenney
2013-01-31 12:11 ` [patch 09/40] cpu: hotplug: Implement setup/removal interface Thomas Gleixner
2013-02-01 13:44   ` Hillf Danton
2013-02-01 13:52     ` Thomas Gleixner
2013-01-31 12:11 ` [patch 11/40] x86: uncore: Move teardown callback to CPU_DEAD Thomas Gleixner
2013-01-31 12:11 ` [patch 12/40] x86: uncore: Convert to hotplug state machine Thomas Gleixner
2013-01-31 12:11 ` [patch 13/40] perf: " Thomas Gleixner
2013-01-31 12:11 ` [patch 14/40] x86: perf: Convert the core to the " Thomas Gleixner
2013-01-31 12:11 ` [patch 16/40] blackfin: perf: Convert hotplug notifier to " Thomas Gleixner
2013-01-31 12:11 ` [patch 15/40] x86: perf: Convert AMD IBS to hotplug " Thomas Gleixner
2013-01-31 12:11 ` [patch 17/40] powerpc: perf: Convert book3s notifier to state machine callbacks Thomas Gleixner
2013-01-31 12:11 ` [patch 18/40] s390: perf: Convert the hotplug " Thomas Gleixner
2013-01-31 12:11 ` [patch 19/40] sh: perf: Convert the hotplug notifiers " Thomas Gleixner
2013-01-31 12:11 ` [patch 21/40] sched: Convert the migration callback to hotplug states Thomas Gleixner
2013-01-31 12:11 ` [patch 20/40] perf: Remove perf cpu notifier code Thomas Gleixner
2013-01-31 12:11 ` [patch 22/40] workqueue: Convert to state machine callbacks Thomas Gleixner
2013-01-31 12:11 ` [patch 23/40] cpufreq: Convert to hotplug state machine Thomas Gleixner
2013-01-31 12:11 ` [patch 24/40] arm64: Convert generic timers " Thomas Gleixner
2013-01-31 12:11 ` [patch 25/40] arm: Convert VFP hotplug notifiers to " Thomas Gleixner
2013-01-31 12:11 ` [patch 26/40] arm: perf: Convert to hotplug " Thomas Gleixner
2013-01-31 12:11 ` [patch 27/40] virt: Convert kvm hotplug to " Thomas Gleixner
2013-01-31 12:11 ` [patch 28/40] cpuhotplug: Remove CPU_STARTING notifier Thomas Gleixner
2013-01-31 12:11 ` [patch 29/40] s390: Convert vtime to hotplug state machine Thomas Gleixner
2013-01-31 12:11 ` [patch 30/40] x86: tboot: Convert " Thomas Gleixner
2013-01-31 12:11 ` [patch 31/40] sched: Convert fair nohz balancer " Thomas Gleixner
2013-01-31 12:11 ` [patch 33/40] hrtimer: Convert " Thomas Gleixner
2013-01-31 12:11 ` [patch 32/40] rcu: Convert rcutree " Thomas Gleixner
2013-02-12  0:01   ` Paul E. McKenney
2013-02-12 15:50     ` Paul E. McKenney
2013-01-31 12:11 ` [patch 34/40] cpuhotplug: Remove CPU_DYING notifier Thomas Gleixner
2013-01-31 12:11 ` [patch 35/40] timers: Convert to hotplug state machine Thomas Gleixner
2013-01-31 12:11 ` [patch 36/40] profile: Convert ot " Thomas Gleixner
2013-01-31 12:11 ` [patch 37/40] x86: x2apic: Convert to cpu " Thomas Gleixner
2013-01-31 12:11 ` [patch 38/40] smp: Convert core to " Thomas Gleixner
2013-01-31 12:11 ` [patch 39/40] relayfs: Convert " Thomas Gleixner
2013-01-31 12:11 ` [patch 40/40] slab: " Thomas Gleixner
2013-01-31 20:23 ` [patch 00/40] CPU hotplug rework - episode I Andrew Morton
2013-01-31 21:48   ` Thomas Gleixner
2013-01-31 21:59     ` Linus Torvalds
2013-01-31 22:44       ` Thomas Gleixner
2013-01-31 22:55         ` Linus Torvalds
2013-02-01 10:51           ` Thomas Gleixner
2013-02-07  4:01             ` Rusty Russell
2013-02-09  0:28 ` Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130131120742.160047181@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=arjan@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=magnus.damm@gmail.com \
    --cc=mingo@kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rusty@rustcorp.com.au \
    --cc=rw@linutronix.de \
    --cc=srivatsa.bhat@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).