From: Thomas Gleixner Get rid of the prio ordering of the separate notifiers and use a proper state callback pair. Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Cc: Andrew Morton Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Nicolas Iooss Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Rusty Russell Signed-off-by: Anna-Maria Gleixner --- include/linux/cpu.h | 9 --- include/linux/cpuhotplug.h | 2 include/linux/workqueue.h | 6 ++ kernel/cpu.c | 10 ++++ kernel/workqueue.c | 108 +++++++++++++++++---------------------------- 5 files changed, 61 insertions(+), 74 deletions(-) --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -55,15 +55,6 @@ extern ssize_t arch_cpu_release(const ch #endif struct notifier_block; -/* - * CPU notifier priorities. - */ -enum { - /* bring up workqueues before normal notifiers and down after */ - CPU_PRI_WORKQUEUE_UP = 5, - CPU_PRI_WORKQUEUE_DOWN = -5, -}; - #define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ #define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ #define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -12,6 +12,7 @@ enum cpuhp_state { CPUHP_PERF_BFIN, CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, + CPUHP_WORKQUEUE_PREP, CPUHP_NOTIFY_PREPARE, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, @@ -49,6 +50,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -625,4 +625,10 @@ void wq_watchdog_touch(int cpu); static inline void wq_watchdog_touch(int cpu) { } #endif /* CONFIG_WQ_WATCHDOG */ +#ifdef CONFIG_SMP +int workqueue_prepare_cpu(unsigned int cpu); +int workqueue_online_cpu(unsigned int cpu); +int workqueue_offline_cpu(unsigned int cpu); +#endif + #endif --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1185,6 +1185,11 @@ static struct cpuhp_step cpuhp_bp_states .startup = perf_event_init_cpu, .teardown = perf_event_exit_cpu, }, + [CPUHP_WORKQUEUE_PREP] = { + .name = "workqueue prepare", + .startup = workqueue_prepare_cpu, + .teardown = NULL, + }, /* * Preparatory and dead notifiers. Will be replaced once the notifiers * are converted to states. @@ -1265,6 +1270,11 @@ static struct cpuhp_step cpuhp_ap_states .startup = perf_event_init_cpu, .teardown = perf_event_exit_cpu, }, + [CPUHP_AP_WORKQUEUE_ONLINE] = { + .name = "workqueue online", + .startup = workqueue_online_cpu, + .teardown = workqueue_offline_cpu, + }, /* * Online/down_prepare notifiers. Will be removed once the notifiers --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4611,84 +4611,65 @@ static void restore_unbound_workers_cpum pool->attrs->cpumask) < 0); } -/* - * Workqueues should be brought up before normal priority CPU notifiers. - * This will be registered high priority CPU notifier. - */ -static int workqueue_cpu_up_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +int workqueue_prepare_cpu(unsigned int cpu) +{ + struct worker_pool *pool; + + for_each_cpu_worker_pool(pool, cpu) { + if (pool->nr_workers) + continue; + if (!create_worker(pool)) + return -ENOMEM; + } + return 0; +} + +int workqueue_online_cpu(unsigned int cpu) { - int cpu = (unsigned long)hcpu; struct worker_pool *pool; struct workqueue_struct *wq; int pi; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - for_each_cpu_worker_pool(pool, cpu) { - if (pool->nr_workers) - continue; - if (!create_worker(pool)) - return NOTIFY_BAD; - } - break; - - case CPU_DOWN_FAILED: - case CPU_ONLINE: - mutex_lock(&wq_pool_mutex); + mutex_lock(&wq_pool_mutex); - for_each_pool(pool, pi) { - mutex_lock(&pool->attach_mutex); + for_each_pool(pool, pi) { + mutex_lock(&pool->attach_mutex); - if (pool->cpu == cpu) - rebind_workers(pool); - else if (pool->cpu < 0) - restore_unbound_workers_cpumask(pool, cpu); + if (pool->cpu == cpu) + rebind_workers(pool); + else if (pool->cpu < 0) + restore_unbound_workers_cpumask(pool, cpu); - mutex_unlock(&pool->attach_mutex); - } + mutex_unlock(&pool->attach_mutex); + } - /* update NUMA affinity of unbound workqueues */ - list_for_each_entry(wq, &workqueues, list) - wq_update_unbound_numa(wq, cpu, true); + /* update NUMA affinity of unbound workqueues */ + list_for_each_entry(wq, &workqueues, list) + wq_update_unbound_numa(wq, cpu, true); - mutex_unlock(&wq_pool_mutex); - break; - } - return NOTIFY_OK; + mutex_unlock(&wq_pool_mutex); + return 0; } -/* - * Workqueues should be brought down after normal priority CPU notifiers. - * This will be registered as low priority CPU notifier. - */ -static int workqueue_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +int workqueue_offline_cpu(unsigned int cpu) { - int cpu = (unsigned long)hcpu; struct work_struct unbind_work; struct workqueue_struct *wq; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - /* unbinding per-cpu workers should happen on the local CPU */ - INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); - queue_work_on(cpu, system_highpri_wq, &unbind_work); - - /* update NUMA affinity of unbound workqueues */ - mutex_lock(&wq_pool_mutex); - list_for_each_entry(wq, &workqueues, list) - wq_update_unbound_numa(wq, cpu, false); - mutex_unlock(&wq_pool_mutex); - - /* wait for per-cpu unbinding to finish */ - flush_work(&unbind_work); - destroy_work_on_stack(&unbind_work); - break; - } - return NOTIFY_OK; + /* unbinding per-cpu workers should happen on the local CPU */ + INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); + queue_work_on(cpu, system_highpri_wq, &unbind_work); + + /* update NUMA affinity of unbound workqueues */ + mutex_lock(&wq_pool_mutex); + list_for_each_entry(wq, &workqueues, list) + wq_update_unbound_numa(wq, cpu, false); + mutex_unlock(&wq_pool_mutex); + + /* wait for per-cpu unbinding to finish */ + flush_work(&unbind_work); + destroy_work_on_stack(&unbind_work); + return 0; } #ifdef CONFIG_SMP @@ -5490,9 +5471,6 @@ static int __init init_workqueues(void) pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); - cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); - hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); - wq_numa_init(); /* initialize CPU pools */