All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: oleg@redhat.com, paulmck@linux.vnet.ibm.com
Cc: tj@kernel.org, mingo@redhat.com, linux-kernel@vger.kernel.org,
	der.herr@hofr.at, peterz@infradead.org, dave@stgolabs.net,
	riel@redhat.com, viro@ZenIV.linux.org.uk,
	torvalds@linux-foundation.org
Subject: [RFC][PATCH 09/13] hotplug: Replace hotplug lock with percpu-rwsem
Date: Mon, 22 Jun 2015 14:16:32 +0200	[thread overview]
Message-ID: <20150622122256.480062572@infradead.org> (raw)
In-Reply-To: 20150622121623.291363374@infradead.org

[-- Attachment #1: peterz-hotplug-rwsem.patch --]
[-- Type: text/plain, Size: 8424 bytes --]

The cpu hotplug lock is a rwsem with read-in-write and read-in-read
recursion. Implement it as such.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/cpu.h          |    6 +
 include/linux/percpu-rwsem.h |   10 ++-
 include/linux/sched.h        |    4 +
 init/main.c                  |    1 
 kernel/cpu.c                 |  133 +++++++++++++------------------------------
 kernel/fork.c                |    2 
 lib/Kconfig                  |    5 +
 7 files changed, 66 insertions(+), 95 deletions(-)

--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -224,6 +224,9 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init(void);
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
@@ -242,6 +245,9 @@ int cpu_down(unsigned int cpu);
 
 #else		/* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init(void) {}
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -20,12 +20,10 @@ extern void __percpu_down_read(struct pe
 extern bool __percpu_down_read_trylock(struct percpu_rw_semaphore *);
 extern void __percpu_up_read(struct percpu_rw_semaphore *);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+static inline void _percpu_down_read(struct percpu_rw_semaphore *sem)
 {
 	might_sleep();
 
-	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
-
 	preempt_disable();
 	/*
 	 * We are in an RCU-sched read-side critical section, so the writer
@@ -46,6 +44,12 @@ static inline void percpu_down_read(stru
 	 */
 }
 
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+	_percpu_down_read(sem);
+}
+
 static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
 	bool ret = true;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1389,6 +1389,10 @@ struct task_struct {
 	unsigned int btrace_seq;
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuhp_ref;
+#endif
+
 	unsigned int policy;
 	int nr_cpus_allowed;
 	cpumask_t cpus_allowed;
--- a/init/main.c
+++ b/init/main.c
@@ -588,6 +588,7 @@ asmlinkage __visible void __init start_k
 	sched_clock_postinit();
 	perf_event_init();
 	profile_init();
+	cpu_hotplug_init();
 	call_function_init();
 	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
 	early_boot_irqs_disabled = false;
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -22,6 +22,7 @@
 #include <linux/lockdep.h>
 #include <linux/tick.h>
 #include <trace/events/power.h>
+#include <linux/percpu-rwsem.h>
 
 #include "smpboot.h"
 
@@ -50,7 +51,8 @@ EXPORT_SYMBOL(cpu_notifier_register_done
 
 static RAW_NOTIFIER_HEAD(cpu_chain);
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
@@ -58,126 +60,72 @@ static int cpu_hotplug_disabled;
 #ifdef CONFIG_HOTPLUG_CPU
 
 static struct {
-	struct task_struct *active_writer;
-	/* wait queue to wake up the active_writer */
-	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = {.name = "cpu_hotplug.lock" },
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
+	struct percpu_rw_semaphore	rwsem;
+	struct task_struct		*writer;
+} cpu_hotplug = { .writer = &init_task, };
+
+void cpu_hotplug_init(void)
+{
+	percpu_init_rwsem(&cpu_hotplug.rwsem);
+	cpu_hotplug.writer = NULL;
+}
 
+void cpu_hotplug_init_task(struct task_struct *p)
+{
+	p->cpuhp_ref = 0;
+}
 
 void get_online_cpus(void)
 {
 	might_sleep();
-	if (cpu_hotplug.active_writer == current)
+
+	/* read in write recursion */
+	if (cpu_hotplug.writer == current)
+		return;
+
+	/* read in read recursion */
+	if (current->cpuhp_ref++)
 		return;
-	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+
+	lock_map_acquire_read(&cpu_hotplug.rwsem.rw_sem.dep_map);
+	_percpu_down_read(&cpu_hotplug.rwsem);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 bool try_get_online_cpus(void)
 {
-	if (cpu_hotplug.active_writer == current)
+	if (cpu_hotplug.writer == current)
 		return true;
-	if (!mutex_trylock(&cpu_hotplug.lock))
-		return false;
-	cpuhp_lock_acquire_tryread();
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
-	return true;
+
+	if (current->cpuhp_ref++)
+		return true;
+
+	return percpu_down_read_trylock(&cpu_hotplug.rwsem);
 }
 EXPORT_SYMBOL_GPL(try_get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
-	if (cpu_hotplug.active_writer == current)
+	if (cpu_hotplug.writer == current)
 		return;
 
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
-
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
-
-	cpuhp_lock_release();
+	if (--current->cpuhp_ref)
+		return;
 
+	percpu_up_read(&cpu_hotplug.rwsem);
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
 void cpu_hotplug_begin(void)
 {
-	DEFINE_WAIT(wait);
-
-	cpu_hotplug.active_writer = current;
-	cpuhp_lock_acquire();
-
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+	percpu_down_write(&cpu_hotplug.rwsem);
+	cpu_hotplug.writer = current;
 }
 
 void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
-	cpuhp_lock_release();
+	cpu_hotplug.writer = NULL;
+	percpu_up_write(&cpu_hotplug.rwsem);
 }
 
 /*
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1410,6 +1410,8 @@ static struct task_struct *copy_process(
 	p->sequential_io_avg	= 0;
 #endif
 
+	cpu_hotplug_init_task(p);
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
 	if (retval)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -56,6 +56,11 @@ config STMP_DEVICE
 config PERCPU_RWSEM
 	bool
 
+config PERCPU_RWSEM_HOTPLUG
+	def_bool y
+	depends on HOTPLUG_CPU
+	select PERCPU_RWSEM
+
 config ARCH_USE_CMPXCHG_LOCKREF
 	bool
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

  parent reply	other threads:[~2015-06-22 12:26 UTC|newest]

Thread overview: 106+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-22 12:16 [RFC][PATCH 00/13] percpu rwsem -v2 Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 01/13] rcu: Create rcu_sync infrastructure Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 02/13] rcusync: Introduce struct rcu_sync_ops Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 03/13] rcusync: Add the CONFIG_PROVE_RCU checks Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 04/13] rcusync: Introduce rcu_sync_dtor() Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 05/13] percpu-rwsem: Optimize readers and reduce global impact Peter Zijlstra
2015-06-22 23:02   ` Oleg Nesterov
2015-06-23  7:28   ` Nicholas Mc Guire
2015-06-25 19:08     ` Peter Zijlstra
2015-06-25 19:17       ` Tejun Heo
2015-06-29  9:32         ` Peter Zijlstra
2015-06-29 15:12           ` Tejun Heo
2015-06-29 15:14             ` Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 06/13] percpu-rwsem: Provide percpu_down_read_trylock() Peter Zijlstra
2015-06-22 23:08   ` Oleg Nesterov
2015-06-22 12:16 ` [RFC][PATCH 07/13] sched: Reorder task_struct Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 08/13] percpu-rwsem: DEFINE_STATIC_PERCPU_RWSEM Peter Zijlstra
2015-06-22 12:16 ` Peter Zijlstra [this message]
2015-06-22 22:57   ` [RFC][PATCH 09/13] hotplug: Replace hotplug lock with percpu-rwsem Oleg Nesterov
2015-06-23  7:16     ` Peter Zijlstra
2015-06-23 17:01       ` Oleg Nesterov
2015-06-23 17:53         ` Peter Zijlstra
2015-06-24 13:50           ` Oleg Nesterov
2015-06-24 14:13             ` Peter Zijlstra
2015-06-24 15:12               ` Oleg Nesterov
2015-06-24 16:15                 ` Peter Zijlstra
2015-06-28 23:56             ` [PATCH 0/3] percpu-rwsem: introduce percpu_rw_semaphore->recursive mode Oleg Nesterov
2015-06-28 23:56               ` [PATCH 1/3] rcusync: introduce rcu_sync_struct->exclusive mode Oleg Nesterov
2015-06-28 23:56               ` [PATCH 2/3] percpu-rwsem: don't use percpu_rw_semaphore->rw_sem to exclude writers Oleg Nesterov
2015-06-28 23:56               ` [PATCH 3/3] percpu-rwsem: introduce percpu_rw_semaphore->recursive mode Oleg Nesterov
2015-06-22 12:16 ` [RFC][PATCH 10/13] fs/locks: Replace lg_global with a percpu-rwsem Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 11/13] fs/locks: Replace lg_local with a per-cpu spinlock Peter Zijlstra
2015-06-23  0:19   ` Oleg Nesterov
2015-06-22 12:16 ` [RFC][PATCH 12/13] stop_machine: Remove lglock Peter Zijlstra
2015-06-22 22:21   ` Oleg Nesterov
2015-06-23 10:09     ` Peter Zijlstra
2015-06-23 10:55       ` Peter Zijlstra
2015-06-23 11:20         ` Peter Zijlstra
2015-06-23 13:08           ` Peter Zijlstra
2015-06-23 16:36             ` Oleg Nesterov
2015-06-23 17:30             ` Paul E. McKenney
2015-06-23 18:04               ` Peter Zijlstra
2015-06-23 18:26                 ` Paul E. McKenney
2015-06-23 19:05                   ` Paul E. McKenney
2015-06-24  2:23                     ` Paul E. McKenney
2015-06-24  8:32                       ` Peter Zijlstra
2015-06-24  9:31                         ` Peter Zijlstra
2015-06-24 13:48                           ` Paul E. McKenney
2015-06-24 15:01                         ` Paul E. McKenney
2015-06-24 15:34                           ` Peter Zijlstra
2015-06-24  7:35                   ` Peter Zijlstra
2015-06-24  8:42                     ` Ingo Molnar
2015-06-24 13:39                       ` Paul E. McKenney
2015-06-24 13:43                         ` Ingo Molnar
2015-06-24 14:03                           ` Paul E. McKenney
2015-06-24 14:50                     ` Paul E. McKenney
2015-06-24 15:01                       ` Peter Zijlstra
2015-06-24 15:27                         ` Paul E. McKenney
2015-06-24 15:40                           ` Peter Zijlstra
2015-06-24 16:09                             ` Paul E. McKenney
2015-06-24 16:42                               ` Peter Zijlstra
2015-06-24 17:10                                 ` Paul E. McKenney
2015-06-24 17:20                                   ` Paul E. McKenney
2015-06-24 17:29                                     ` Peter Zijlstra
2015-06-24 17:28                                   ` Peter Zijlstra
2015-06-24 17:32                                     ` Peter Zijlstra
2015-06-24 18:14                                     ` Peter Zijlstra
2015-06-24 17:58                                   ` Peter Zijlstra
2015-06-25  3:23                                     ` Paul E. McKenney
2015-06-25 11:07                                       ` Peter Zijlstra
2015-06-25 13:47                                         ` Paul E. McKenney
2015-06-25 14:20                                           ` Peter Zijlstra
2015-06-25 14:51                                             ` Paul E. McKenney
2015-06-26 12:32                                               ` Peter Zijlstra
2015-06-26 16:14                                                 ` Paul E. McKenney
2015-06-29  7:56                                                   ` Peter Zijlstra
2015-06-30 21:32                                                     ` Paul E. McKenney
2015-07-01 11:56                                                       ` Peter Zijlstra
2015-07-01 15:56                                                         ` Paul E. McKenney
2015-07-01 16:16                                                           ` Peter Zijlstra
2015-07-01 18:45                                                             ` Paul E. McKenney
2015-06-23 14:39         ` Paul E. McKenney
2015-06-23 16:20       ` Oleg Nesterov
2015-06-23 17:24         ` Oleg Nesterov
2015-06-25 19:18           ` Peter Zijlstra
2015-06-22 12:16 ` [RFC][PATCH 13/13] locking: " Peter Zijlstra
2015-06-22 12:36 ` [RFC][PATCH 00/13] percpu rwsem -v2 Peter Zijlstra
2015-06-22 18:11 ` Daniel Wagner
2015-06-22 19:05   ` Peter Zijlstra
2015-06-23  9:35     ` Daniel Wagner
2015-06-23 10:00       ` Ingo Molnar
2015-06-23 14:34       ` Peter Zijlstra
2015-06-23 14:56         ` Daniel Wagner
2015-06-23 17:50           ` Peter Zijlstra
2015-06-23 19:36             ` Peter Zijlstra
2015-06-24  8:46               ` Ingo Molnar
2015-06-24  9:01                 ` Peter Zijlstra
2015-06-24  9:18                 ` Daniel Wagner
2015-07-01  5:57                   ` Daniel Wagner
2015-07-01 21:54                     ` Linus Torvalds
2015-07-02  9:41                       ` Peter Zijlstra
2015-07-20  5:53                         ` Daniel Wagner
2015-07-20 18:44                           ` Linus Torvalds
2015-06-22 20:06 ` Linus Torvalds
2015-06-23 16:10 ` Davidlohr Bueso
2015-06-23 16:21   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150622122256.480062572@infradead.org \
    --to=peterz@infradead.org \
    --cc=dave@stgolabs.net \
    --cc=der.herr@hofr.at \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.