All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-15 12:36 ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-15 12:36 UTC (permalink / raw)
  To: Intel graphics driver community testing & development
  Cc: Joonas Lahtinen, Linux kernel development, Ingo Molnar,
	Peter Zijlstra, David Hildenbrand, Paul E. McKenney,
	Gautham R. Shenoy, Chris Wilson

Instead of implementing a custom locked reference counting, use lockref.

Current implementation leads to a deadlock splat on Intel SKL platforms
when lockdep debugging is enabled.

This is due to few of CPUfreq drivers (including Intel P-state) having this;
policy->rwsem is locked during driver initialization and the functions called
during init that actually apply CPU limits use get_online_cpus (because they
have other calling paths too), which will briefly lock cpu_hotplug.lock to
increase cpu_hotplug.refcount.

On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
which will lock policy->rwsem and cpu_hotplug.lock is already held by
cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
our CI system (though it is a very unlikely one). See the Bugzilla link for more
details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294
Cc: Linux kernel development <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 kernel/cpu.c | 87 +++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d396..8acec83 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
+#include <linux/lockref.h>
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
@@ -62,13 +63,10 @@ static struct {
 	struct task_struct *active_writer;
 	/* wait queue to wake up the active_writer */
 	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
+	/* wait queue to wake up readers */
+	wait_queue_head_t reader_wq;
+	/* track online CPU users */
+	struct lockref lockref;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -76,7 +74,7 @@ static struct {
 } cpu_hotplug = {
 	.active_writer = NULL,
 	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+	.reader_wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.reader_wq),
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	.dep_map = {.name = "cpu_hotplug.lock" },
 #endif
@@ -92,52 +90,64 @@ static struct {
 
 void get_online_cpus(void)
 {
-	might_sleep();
+	DEFINE_WAIT(wait);
+
 	if (cpu_hotplug.active_writer == current)
 		return;
+
 	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+
+	/* First to get might have to wait over a hotplug operation. */
+	while (!lockref_get_or_lock(&cpu_hotplug.lockref)) {
+		if (cpu_hotplug.lockref.count == 0) {
+			cpu_hotplug.lockref.count++;
+			spin_unlock(&cpu_hotplug.lockref.lock);
+			break;
+		}
+		spin_unlock(&cpu_hotplug.lockref.lock);
+
+		prepare_to_wait(&cpu_hotplug.reader_wq, &wait, TASK_UNINTERRUPTIBLE);
+		schedule();
+		finish_wait(&cpu_hotplug.reader_wq, &wait);
+	}
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
 	if (cpu_hotplug.active_writer == current)
 		return;
 
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
+	/* Last to release might have to wake queued hotplug operation. */
+	if (!lockref_put_or_lock(&cpu_hotplug.lockref)) {
+		WARN_ON(cpu_hotplug.lockref.count <= 0);
+		cpu_hotplug.lockref.count = 0;
+		spin_unlock(&cpu_hotplug.lockref.lock);
 
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
+		if (waitqueue_active(&cpu_hotplug.wq))
+			wake_up(&cpu_hotplug.wq);
+	}
 
 	cpuhp_lock_release();
-
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
  * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
+ * cpu_hotplug.lockref goes to zero.
  *
  * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
+ * will be blocked by the cpu_hotplug.lockref being dead.
  *
  * Since cpu_hotplug_begin() is always called after invoking
  * cpu_maps_update_begin(), we can be sure that only one writer is active.
  *
  * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
+ * - Lockref goes to zero, last reader wakes up the sleeping
  *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
+ * - A new reader arrives at this moment, bumps up the lockref.
+ * - The woken up writer finds the lockref non-zero and goes
+ *   to sleep again.
  *
  * However, this is very difficult to achieve in practice since
  * get_online_cpus() not an api which is called all that often.
@@ -151,20 +161,31 @@ void cpu_hotplug_begin(void)
 	cpuhp_lock_acquire();
 
 	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
+		spin_lock(&cpu_hotplug.lockref.lock);
+		if (cpu_hotplug.lockref.count <= 0)
+			break;
+		spin_unlock(&cpu_hotplug.lockref.lock);
+
 		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
 		schedule();
+		finish_wait(&cpu_hotplug.wq, &wait);
 	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+
+	WARN_ON(__lockref_is_dead(&cpu_hotplug.lockref));
+	lockref_mark_dead(&cpu_hotplug.lockref);
+	spin_unlock(&cpu_hotplug.lockref.lock);
 }
 
 void cpu_hotplug_done(void)
 {
+	WARN_ON(!__lockref_is_dead(&cpu_hotplug.lockref));
+	cpu_hotplug.lockref.count = 0;
+	spin_unlock(&cpu_hotplug.lockref.lock);
+
+	if (waitqueue_active(&cpu_hotplug.reader_wq))
+		wake_up(&cpu_hotplug.reader_wq);
+
 	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
 	cpuhp_lock_release();
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-15 12:36 ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-15 12:36 UTC (permalink / raw)
  To: Intel graphics driver community testing & development
  Cc: Gautham R. Shenoy, Peter Zijlstra, Linux kernel development,
	David Hildenbrand, Paul E. McKenney, Ingo Molnar

Instead of implementing a custom locked reference counting, use lockref.

Current implementation leads to a deadlock splat on Intel SKL platforms
when lockdep debugging is enabled.

This is due to few of CPUfreq drivers (including Intel P-state) having this;
policy->rwsem is locked during driver initialization and the functions called
during init that actually apply CPU limits use get_online_cpus (because they
have other calling paths too), which will briefly lock cpu_hotplug.lock to
increase cpu_hotplug.refcount.

On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
which will lock policy->rwsem and cpu_hotplug.lock is already held by
cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
our CI system (though it is a very unlikely one). See the Bugzilla link for more
details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294
Cc: Linux kernel development <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 kernel/cpu.c | 87 +++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d396..8acec83 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
+#include <linux/lockref.h>
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
@@ -62,13 +63,10 @@ static struct {
 	struct task_struct *active_writer;
 	/* wait queue to wake up the active_writer */
 	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
+	/* wait queue to wake up readers */
+	wait_queue_head_t reader_wq;
+	/* track online CPU users */
+	struct lockref lockref;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -76,7 +74,7 @@ static struct {
 } cpu_hotplug = {
 	.active_writer = NULL,
 	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+	.reader_wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.reader_wq),
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	.dep_map = {.name = "cpu_hotplug.lock" },
 #endif
@@ -92,52 +90,64 @@ static struct {
 
 void get_online_cpus(void)
 {
-	might_sleep();
+	DEFINE_WAIT(wait);
+
 	if (cpu_hotplug.active_writer == current)
 		return;
+
 	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+
+	/* First to get might have to wait over a hotplug operation. */
+	while (!lockref_get_or_lock(&cpu_hotplug.lockref)) {
+		if (cpu_hotplug.lockref.count == 0) {
+			cpu_hotplug.lockref.count++;
+			spin_unlock(&cpu_hotplug.lockref.lock);
+			break;
+		}
+		spin_unlock(&cpu_hotplug.lockref.lock);
+
+		prepare_to_wait(&cpu_hotplug.reader_wq, &wait, TASK_UNINTERRUPTIBLE);
+		schedule();
+		finish_wait(&cpu_hotplug.reader_wq, &wait);
+	}
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
 	if (cpu_hotplug.active_writer == current)
 		return;
 
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
+	/* Last to release might have to wake queued hotplug operation. */
+	if (!lockref_put_or_lock(&cpu_hotplug.lockref)) {
+		WARN_ON(cpu_hotplug.lockref.count <= 0);
+		cpu_hotplug.lockref.count = 0;
+		spin_unlock(&cpu_hotplug.lockref.lock);
 
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
+		if (waitqueue_active(&cpu_hotplug.wq))
+			wake_up(&cpu_hotplug.wq);
+	}
 
 	cpuhp_lock_release();
-
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
  * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
+ * cpu_hotplug.lockref goes to zero.
  *
  * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
+ * will be blocked by the cpu_hotplug.lockref being dead.
  *
  * Since cpu_hotplug_begin() is always called after invoking
  * cpu_maps_update_begin(), we can be sure that only one writer is active.
  *
  * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
+ * - Lockref goes to zero, last reader wakes up the sleeping
  *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
+ * - A new reader arrives at this moment, bumps up the lockref.
+ * - The woken up writer finds the lockref non-zero and goes
+ *   to sleep again.
  *
  * However, this is very difficult to achieve in practice since
  * get_online_cpus() not an api which is called all that often.
@@ -151,20 +161,31 @@ void cpu_hotplug_begin(void)
 	cpuhp_lock_acquire();
 
 	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
+		spin_lock(&cpu_hotplug.lockref.lock);
+		if (cpu_hotplug.lockref.count <= 0)
+			break;
+		spin_unlock(&cpu_hotplug.lockref.lock);
+
 		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
 		schedule();
+		finish_wait(&cpu_hotplug.wq, &wait);
 	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+
+	WARN_ON(__lockref_is_dead(&cpu_hotplug.lockref));
+	lockref_mark_dead(&cpu_hotplug.lockref);
+	spin_unlock(&cpu_hotplug.lockref.lock);
 }
 
 void cpu_hotplug_done(void)
 {
+	WARN_ON(!__lockref_is_dead(&cpu_hotplug.lockref));
+	cpu_hotplug.lockref.count = 0;
+	spin_unlock(&cpu_hotplug.lockref.lock);
+
+	if (waitqueue_active(&cpu_hotplug.reader_wq))
+		wake_up(&cpu_hotplug.reader_wq);
+
 	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
 	cpuhp_lock_release();
 }
 
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-15 12:36 ` Joonas Lahtinen
@ 2016-02-15 14:17   ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-15 14:17 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> Instead of implementing a custom locked reference counting, use lockref.
> 
> Current implementation leads to a deadlock splat on Intel SKL platforms
> when lockdep debugging is enabled.
> 
> This is due to few of CPUfreq drivers (including Intel P-state) having this;
> policy->rwsem is locked during driver initialization and the functions called
> during init that actually apply CPU limits use get_online_cpus (because they
> have other calling paths too), which will briefly lock cpu_hotplug.lock to
> increase cpu_hotplug.refcount.
> 
> On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> which will lock policy->rwsem and cpu_hotplug.lock is already held by
> cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> our CI system (though it is a very unlikely one). See the Bugzilla link for more
> details.

I've been meaning to change the thing into a percpu-rwsem, I just
haven't had time to look into the lockdep splat that generated.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-15 14:17   ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-15 14:17 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> Instead of implementing a custom locked reference counting, use lockref.
> 
> Current implementation leads to a deadlock splat on Intel SKL platforms
> when lockdep debugging is enabled.
> 
> This is due to few of CPUfreq drivers (including Intel P-state) having this;
> policy->rwsem is locked during driver initialization and the functions called
> during init that actually apply CPU limits use get_online_cpus (because they
> have other calling paths too), which will briefly lock cpu_hotplug.lock to
> increase cpu_hotplug.refcount.
> 
> On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> which will lock policy->rwsem and cpu_hotplug.lock is already held by
> cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> our CI system (though it is a very unlikely one). See the Bugzilla link for more
> details.

I've been meaning to change the thing into a percpu-rwsem, I just
haven't had time to look into the lockdep splat that generated.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-15 14:17   ` Peter Zijlstra
@ 2016-02-15 17:06     ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-15 17:06 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > Instead of implementing a custom locked reference counting, use lockref.
> > 
> > Current implementation leads to a deadlock splat on Intel SKL platforms
> > when lockdep debugging is enabled.
> > 
> > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > policy->rwsem is locked during driver initialization and the functions called
> > during init that actually apply CPU limits use get_online_cpus (because they
> > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > increase cpu_hotplug.refcount.
> > 
> > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > details.
> 
> I've been meaning to change the thing into a percpu-rwsem, I just
> haven't had time to look into the lockdep splat that generated.


The below has plenty lockdep issues because percpu-rwsem is
reader-writer fair (like the regular rwsem), so it does throw up a fair
number of very icky issues.

If at all possible, I'd really rather fix those and have a 'saner'
hotplug lock, rather than muddle on with open-coded horror lock we have
now.


--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
@@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
 
 #else		/* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
 	wait_queue_head_t	write_waitq;
 };
 
+#define DEFINE_STATIC_PERCPU_RWSEM(name)				\
+static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);	\
+static struct percpu_rw_semaphore name = {				\
+	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
+	.fast_read_ctr = &__percpu_rwsem_frc_##name,			\
+	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
+	.write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq),	\
+}
+
 extern void percpu_down_read(struct percpu_rw_semaphore *);
 extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
 extern void percpu_up_read(struct percpu_rw_semaphore *);
@@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
 	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
+#define percpu_rwsem_assert_held(sem)                          \
+	lockdep_assert_held(&(sem)->rw_sem)
+
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1403,6 +1403,9 @@ struct task_struct {
 	struct task_struct *last_wakee;
 
 	int wake_cpu;
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuhp_ref;
+#endif
 #endif
 	int on_rq;
 
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,7 @@
 #include <linux/tick.h>
 #include <linux/irq.h>
 #include <trace/events/power.h>
+#include <linux/percpu-rwsem.h>
 
 #include "smpboot.h"
 
@@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
 
 static RAW_NOTIFIER_HEAD(cpu_chain);
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	/* wait queue to wake up the active_writer */
-	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = {.name = "cpu_hotplug.lock" },
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
+DEFINE_STATIC_PERCPU_RWSEM(hotplug);
 
+void cpu_hotplug_init_task(struct task_struct *p)
+{
+	if (WARN_ON_ONCE(p->cpuhp_ref))
+		p->cpuhp_ref = 0;
+}
 
 void get_online_cpus(void)
 {
 	might_sleep();
-	if (cpu_hotplug.active_writer == current)
+
+	if (current->cpuhp_ref++) /* read recursion */
 		return;
-	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+
+	percpu_down_read(&hotplug);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
-	if (cpu_hotplug.active_writer == current)
+	if (--current->cpuhp_ref)
 		return;
 
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
-
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
-
-	cpuhp_lock_release();
-
+	percpu_up_read(&hotplug);
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
 void cpu_hotplug_begin(void)
 {
-	DEFINE_WAIT(wait);
-
-	cpu_hotplug.active_writer = current;
-	cpuhp_lock_acquire();
-
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+	percpu_down_write(&hotplug);
+	current->cpuhp_ref++; /* allow read-in-write recursion */
 }
 
 void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
-	cpuhp_lock_release();
+	current->cpuhp_ref--;
+	percpu_up_write(&hotplug);
 }
 
 /*
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1414,6 +1414,8 @@ static struct task_struct *copy_process(
 	p->sequential_io_avg	= 0;
 #endif
 
+	cpu_hotplug_init_task(p);
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
 	if (retval)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -53,6 +53,11 @@ config GENERIC_IO
 config STMP_DEVICE
 	bool
 
+config PERCPU_RWSEM_HOTPLUG
+	def_bool y
+	depends on HOTPLUG_CPU
+	select PERCPU_RWSEM
+
 config ARCH_USE_CMPXCHG_LOCKREF
 	bool
 

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-15 17:06     ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-15 17:06 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > Instead of implementing a custom locked reference counting, use lockref.
> > 
> > Current implementation leads to a deadlock splat on Intel SKL platforms
> > when lockdep debugging is enabled.
> > 
> > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > policy->rwsem is locked during driver initialization and the functions called
> > during init that actually apply CPU limits use get_online_cpus (because they
> > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > increase cpu_hotplug.refcount.
> > 
> > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > details.
> 
> I've been meaning to change the thing into a percpu-rwsem, I just
> haven't had time to look into the lockdep splat that generated.


The below has plenty lockdep issues because percpu-rwsem is
reader-writer fair (like the regular rwsem), so it does throw up a fair
number of very icky issues.

If at all possible, I'd really rather fix those and have a 'saner'
hotplug lock, rather than muddle on with open-coded horror lock we have
now.


--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
@@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
 
 #else		/* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
 	wait_queue_head_t	write_waitq;
 };
 
+#define DEFINE_STATIC_PERCPU_RWSEM(name)				\
+static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);	\
+static struct percpu_rw_semaphore name = {				\
+	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
+	.fast_read_ctr = &__percpu_rwsem_frc_##name,			\
+	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
+	.write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq),	\
+}
+
 extern void percpu_down_read(struct percpu_rw_semaphore *);
 extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
 extern void percpu_up_read(struct percpu_rw_semaphore *);
@@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
 	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
+#define percpu_rwsem_assert_held(sem)                          \
+	lockdep_assert_held(&(sem)->rw_sem)
+
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1403,6 +1403,9 @@ struct task_struct {
 	struct task_struct *last_wakee;
 
 	int wake_cpu;
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuhp_ref;
+#endif
 #endif
 	int on_rq;
 
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,7 @@
 #include <linux/tick.h>
 #include <linux/irq.h>
 #include <trace/events/power.h>
+#include <linux/percpu-rwsem.h>
 
 #include "smpboot.h"
 
@@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
 
 static RAW_NOTIFIER_HEAD(cpu_chain);
 
-/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+/*
+ * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
  */
 static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	/* wait queue to wake up the active_writer */
-	wait_queue_head_t wq;
-	/* verifies that no writer will get active while readers are active */
-	struct mutex lock;
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	atomic_t refcount;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = {.name = "cpu_hotplug.lock" },
-#endif
-};
-
-/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
-#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire_tryread() \
-				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
-#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
-#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
+DEFINE_STATIC_PERCPU_RWSEM(hotplug);
 
+void cpu_hotplug_init_task(struct task_struct *p)
+{
+	if (WARN_ON_ONCE(p->cpuhp_ref))
+		p->cpuhp_ref = 0;
+}
 
 void get_online_cpus(void)
 {
 	might_sleep();
-	if (cpu_hotplug.active_writer == current)
+
+	if (current->cpuhp_ref++) /* read recursion */
 		return;
-	cpuhp_lock_acquire_read();
-	mutex_lock(&cpu_hotplug.lock);
-	atomic_inc(&cpu_hotplug.refcount);
-	mutex_unlock(&cpu_hotplug.lock);
+
+	percpu_down_read(&hotplug);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
 void put_online_cpus(void)
 {
-	int refcount;
-
-	if (cpu_hotplug.active_writer == current)
+	if (--current->cpuhp_ref)
 		return;
 
-	refcount = atomic_dec_return(&cpu_hotplug.refcount);
-	if (WARN_ON(refcount < 0)) /* try to fix things up */
-		atomic_inc(&cpu_hotplug.refcount);
-
-	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
-		wake_up(&cpu_hotplug.wq);
-
-	cpuhp_lock_release();
-
+	percpu_up_read(&hotplug);
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
-/*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
- */
 void cpu_hotplug_begin(void)
 {
-	DEFINE_WAIT(wait);
-
-	cpu_hotplug.active_writer = current;
-	cpuhp_lock_acquire();
-
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
-		if (likely(!atomic_read(&cpu_hotplug.refcount)))
-				break;
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
-	finish_wait(&cpu_hotplug.wq, &wait);
+	percpu_down_write(&hotplug);
+	current->cpuhp_ref++; /* allow read-in-write recursion */
 }
 
 void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
-	cpuhp_lock_release();
+	current->cpuhp_ref--;
+	percpu_up_write(&hotplug);
 }
 
 /*
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1414,6 +1414,8 @@ static struct task_struct *copy_process(
 	p->sequential_io_avg	= 0;
 #endif
 
+	cpu_hotplug_init_task(p);
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
 	if (retval)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -53,6 +53,11 @@ config GENERIC_IO
 config STMP_DEVICE
 	bool
 
+config PERCPU_RWSEM_HOTPLUG
+	def_bool y
+	depends on HOTPLUG_CPU
+	select PERCPU_RWSEM
+
 config ARCH_USE_CMPXCHG_LOCKREF
 	bool
 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Intel-gfx] [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-15 14:17   ` Peter Zijlstra
@ 2016-02-15 17:18     ` Daniel Vetter
  -1 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2016-02-15 17:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Joonas Lahtinen, Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > Instead of implementing a custom locked reference counting, use lockref.
> > 
> > Current implementation leads to a deadlock splat on Intel SKL platforms
> > when lockdep debugging is enabled.
> > 
> > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > policy->rwsem is locked during driver initialization and the functions called
> > during init that actually apply CPU limits use get_online_cpus (because they
> > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > increase cpu_hotplug.refcount.
> > 
> > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > details.
> 
> I've been meaning to change the thing into a percpu-rwsem, I just
> haven't had time to look into the lockdep splat that generated.

I've thrown Joonas patch into a local topic branch to shut up the noise in
our CI, and it seems to be effective at that (2 runs thus far). I'll drop
this again once we have a proper solution (whatever it'll be) upstream.

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-15 17:18     ` Daniel Vetter
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2016-02-15 17:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > Instead of implementing a custom locked reference counting, use lockref.
> > 
> > Current implementation leads to a deadlock splat on Intel SKL platforms
> > when lockdep debugging is enabled.
> > 
> > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > policy->rwsem is locked during driver initialization and the functions called
> > during init that actually apply CPU limits use get_online_cpus (because they
> > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > increase cpu_hotplug.refcount.
> > 
> > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > details.
> 
> I've been meaning to change the thing into a percpu-rwsem, I just
> haven't had time to look into the lockdep splat that generated.

I've thrown Joonas patch into a local topic branch to shut up the noise in
our CI, and it seems to be effective at that (2 runs thus far). I'll drop
this again once we have a proper solution (whatever it'll be) upstream.

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-15 17:06     ` Peter Zijlstra
@ 2016-02-16  8:49       ` Joonas Lahtinen
  -1 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-16  8:49 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > > policy->rwsem is locked during driver initialization and the functions called
> > > during init that actually apply CPU limits use get_online_cpus (because they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 

I originally thought of implementing this more similar to what you
specify, but then I came across a discussion in the mailing list where
it was NAKed adding more members to task_struct;

http://comments.gmane.org/gmane.linux.kernel/970273

Adding proper recursion (the way my initial implementation was going)
got ugly without modifying task_struct because get_online_cpus() is a
speed critical code path.

So I'm all for fixing the current code in a different way if that will
then be merged.

Regards, Joonas

> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 
> 

<SNIP>

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-16  8:49       ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-16  8:49 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > > policy->rwsem is locked during driver initialization and the functions called
> > > during init that actually apply CPU limits use get_online_cpus (because they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 

I originally thought of implementing this more similar to what you
specify, but then I came across a discussion in the mailing list where
it was NAKed adding more members to task_struct;

http://comments.gmane.org/gmane.linux.kernel/970273

Adding proper recursion (the way my initial implementation was going)
got ugly without modifying task_struct because get_online_cpus() is a
speed critical code path.

So I'm all for fixing the current code in a different way if that will
then be merged.

Regards, Joonas

> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 
> 

<SNIP>

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-16  8:49       ` Joonas Lahtinen
@ 2016-02-16  9:14         ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-16  9:14 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> I originally thought of implementing this more similar to what you
> specify, but then I came across a discussion in the mailing list where
> it was NAKed adding more members to task_struct;
> 
> http://comments.gmane.org/gmane.linux.kernel/970273
> 
> Adding proper recursion (the way my initial implementation was going)
> got ugly without modifying task_struct because get_online_cpus() is a
> speed critical code path.

Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
considered performance critical.

> So I'm all for fixing the current code in a different way if that will
> then be merged.

So I'm not sure why you're poking at this horror show to begin with.
ISTR you mentioning a lockdep splat for SKL, but failed to provide
detail.

Making the hotplug lock _more_ special to fix that is just wrong. Fix
the retarded locking that lead to it.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-16  9:14         ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-16  9:14 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> I originally thought of implementing this more similar to what you
> specify, but then I came across a discussion in the mailing list where
> it was NAKed adding more members to task_struct;
> 
> http://comments.gmane.org/gmane.linux.kernel/970273
> 
> Adding proper recursion (the way my initial implementation was going)
> got ugly without modifying task_struct because get_online_cpus() is a
> speed critical code path.

Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
considered performance critical.

> So I'm all for fixing the current code in a different way if that will
> then be merged.

So I'm not sure why you're poking at this horror show to begin with.
ISTR you mentioning a lockdep splat for SKL, but failed to provide
detail.

Making the hotplug lock _more_ special to fix that is just wrong. Fix
the retarded locking that lead to it.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-16  9:14         ` Peter Zijlstra
@ 2016-02-16 10:51           ` Joonas Lahtinen
  -1 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-16 10:51 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

On ti, 2016-02-16 at 10:14 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> > I originally thought of implementing this more similar to what you
> > specify, but then I came across a discussion in the mailing list where
> > it was NAKed adding more members to task_struct;
> > 
> > http://comments.gmane.org/gmane.linux.kernel/970273
> > 
> > Adding proper recursion (the way my initial implementation was going)
> > got ugly without modifying task_struct because get_online_cpus() is a
> > speed critical code path.
> 
> Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
> considered performance critical.

Oh well, at least changes to it added quite noticeably to the bootup
time of a system.

> 
> > So I'm all for fixing the current code in a different way if that will
> > then be merged.
> 
> So I'm not sure why you're poking at this horror show to begin with.
> ISTR you mentioning a lockdep splat for SKL, but failed to provide
> detail.
> 

Quoting my original patch;

"See the Bugzilla link for more details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294"

The improvement my patch implements is to use lockref for locked
reference counting (hotplug code previously rolled its own mutex +
atomic combo), which gets rid of the deadlock scenario described and
linked in the initial patch. Trace for the scenario;

https://bugs.freedesktop.org/attachment.cgi?id=121490

I think using lockref makes it substantially less special, lockref code
being a lot more battle-tested in the FS code than the previous
cpu_hotplug.lock mess.

> Making the hotplug lock _more_ special to fix that is just wrong. Fix
> the retarded locking that lead to it.
> 

I do agree that it's still not pretty, but now it does correctly what
the previous code was trying to do with custom mutex + atomic.

I'm all for fixing the code further, but prior to proceeding there
needs to be some sort of an agreement on either making
get_online_cpus() slower (which does not seem like a good idea) or
adding more members to task_struct.

Regards, Joonas

> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-16 10:51           ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-16 10:51 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On ti, 2016-02-16 at 10:14 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 10:49:36AM +0200, Joonas Lahtinen wrote:
> > I originally thought of implementing this more similar to what you
> > specify, but then I came across a discussion in the mailing list where
> > it was NAKed adding more members to task_struct;
> > 
> > http://comments.gmane.org/gmane.linux.kernel/970273
> > 
> > Adding proper recursion (the way my initial implementation was going)
> > got ugly without modifying task_struct because get_online_cpus() is a
> > speed critical code path.
> 
> Yeah, just don't let Linus hear you say that. get_online_cpus() is _not_
> considered performance critical.

Oh well, at least changes to it added quite noticeably to the bootup
time of a system.

> 
> > So I'm all for fixing the current code in a different way if that will
> > then be merged.
> 
> So I'm not sure why you're poking at this horror show to begin with.
> ISTR you mentioning a lockdep splat for SKL, but failed to provide
> detail.
> 

Quoting my original patch;

"See the Bugzilla link for more details.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93294"

The improvement my patch implements is to use lockref for locked
reference counting (hotplug code previously rolled its own mutex +
atomic combo), which gets rid of the deadlock scenario described and
linked in the initial patch. Trace for the scenario;

https://bugs.freedesktop.org/attachment.cgi?id=121490

I think using lockref makes it substantially less special, lockref code
being a lot more battle-tested in the FS code than the previous
cpu_hotplug.lock mess.

> Making the hotplug lock _more_ special to fix that is just wrong. Fix
> the retarded locking that lead to it.
> 

I do agree that it's still not pretty, but now it does correctly what
the previous code was trying to do with custom mutex + atomic.

I'm all for fixing the code further, but prior to proceeding there
needs to be some sort of an agreement on either making
get_online_cpus() slower (which does not seem like a good idea) or
adding more members to task_struct.

Regards, Joonas

> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-16 10:51           ` Joonas Lahtinen
@ 2016-02-16 11:07             ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-16 11:07 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> Quoting my original patch;
> 
> "See the Bugzilla link for more details.

If its not in the Changelog it doesn't exist. Patches should be self
contained and not refer to external sources for critical information.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-16 11:07             ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-16 11:07 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> Quoting my original patch;
> 
> "See the Bugzilla link for more details.

If its not in the Changelog it doesn't exist. Patches should be self
contained and not refer to external sources for critical information.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-16 11:07             ` Peter Zijlstra
@ 2016-02-17 12:47               ` Joonas Lahtinen
  -1 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-17 12:47 UTC (permalink / raw)
  To: Peter Zijlstra, Oleg Nesterov
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > Quoting my original patch;
> > 
> > "See the Bugzilla link for more details.
> 
> If its not in the Changelog it doesn't exist. Patches should be self
> contained and not refer to external sources for critical information.

The exact locking case in CPUfreq drivers causing a splat is described
in the patch. Details were already included, that's why term "more
details" was used.

This is not exactly taking us closer to a fix, so I'd rather like to
discuss about the possibility of adding one more member to task_struct.
I added Oleg as CC to catch his attention to re-evaluate based on the
previous discussion in this thread.

Regards, Joonas

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-17 12:47               ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-17 12:47 UTC (permalink / raw)
  To: Peter Zijlstra, Oleg Nesterov
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > Quoting my original patch;
> > 
> > "See the Bugzilla link for more details.
> 
> If its not in the Changelog it doesn't exist. Patches should be self
> contained and not refer to external sources for critical information.

The exact locking case in CPUfreq drivers causing a splat is described
in the patch. Details were already included, that's why term "more
details" was used.

This is not exactly taking us closer to a fix, so I'd rather like to
discuss about the possibility of adding one more member to task_struct.
I added Oleg as CC to catch his attention to re-evaluate based on the
previous discussion in this thread.

Regards, Joonas

-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-17 12:47               ` Joonas Lahtinen
@ 2016-02-17 14:20                 ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-17 14:20 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > Quoting my original patch;
> > > 
> > > "See the Bugzilla link for more details.
> > 
> > If its not in the Changelog it doesn't exist. Patches should be self
> > contained and not refer to external sources for critical information.
> 
> The exact locking case in CPUfreq drivers causing a splat is described
> in the patch. Details were already included, that's why term "more
> details" was used.

Barely. What was not described was why you went to tinker with the
hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
is good.

> This is not exactly taking us closer to a fix, 

Why you think we can discuss fixes if you've not actually described your
problem is beyond me.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-17 14:20                 ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-17 14:20 UTC (permalink / raw)
  To: Joonas Lahtinen
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Oleg Nesterov, Linux kernel development, David Hildenbrand,
	Paul E. McKenney, Ingo Molnar

On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > Quoting my original patch;
> > > 
> > > "See the Bugzilla link for more details.
> > 
> > If its not in the Changelog it doesn't exist. Patches should be self
> > contained and not refer to external sources for critical information.
> 
> The exact locking case in CPUfreq drivers causing a splat is described
> in the patch. Details were already included, that's why term "more
> details" was used.

Barely. What was not described was why you went to tinker with the
hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
is good.

> This is not exactly taking us closer to a fix, 

Why you think we can discuss fixes if you've not actually described your
problem is beyond me.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-17 14:20                 ` Peter Zijlstra
@ 2016-02-17 16:13                   ` Daniel Vetter
  -1 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2016-02-17 16:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson, Daniel Vetter

On Wed, Feb 17, 2016 at 03:20:05PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> > On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > > Quoting my original patch;
> > > > 
> > > > "See the Bugzilla link for more details.
> > > 
> > > If its not in the Changelog it doesn't exist. Patches should be self
> > > contained and not refer to external sources for critical information.
> > 
> > The exact locking case in CPUfreq drivers causing a splat is described
> > in the patch. Details were already included, that's why term "more
> > details" was used.
> 
> Barely. What was not described was why you went to tinker with the
> hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
> is good.
> 
> > This is not exactly taking us closer to a fix, 
> 
> Why you think we can discuss fixes if you've not actually described your
> problem is beyond me.

Can we please stop the petty-fights while figuring out how to work out a
solution here, thanks.

And for context we're hitting this on CI in a bunch of our machines, which
means no more lockdep checking for us. Which is, at least for me, pretty
serious, and why we're throwing complete cpu-anything newbies at that code
trying to come up with some solution to unblock our CI efforts for the
intel gfx driver. Unfortunately our attempts at just disabling lots of
Kconfig symbols proofed futile, so ideas to avoid all that code highly
welcome.

As soon as CI stops hitting this we'll jump out of your inbox, if you want
so.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-17 16:13                   ` Daniel Vetter
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2016-02-17 16:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Oleg Nesterov, Linux kernel development, David Hildenbrand,
	Paul E. McKenney, Ingo Molnar

On Wed, Feb 17, 2016 at 03:20:05PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 02:47:31PM +0200, Joonas Lahtinen wrote:
> > On ti, 2016-02-16 at 12:07 +0100, Peter Zijlstra wrote:
> > > On Tue, Feb 16, 2016 at 12:51:03PM +0200, Joonas Lahtinen wrote:
> > > > Quoting my original patch;
> > > > 
> > > > "See the Bugzilla link for more details.
> > > 
> > > If its not in the Changelog it doesn't exist. Patches should be self
> > > contained and not refer to external sources for critical information.
> > 
> > The exact locking case in CPUfreq drivers causing a splat is described
> > in the patch. Details were already included, that's why term "more
> > details" was used.
> 
> Barely. What was not described was why you went to tinker with the
> hotplug lock instead of sanitizing cpufreq. Nor why your chosen solution
> is good.
> 
> > This is not exactly taking us closer to a fix, 
> 
> Why you think we can discuss fixes if you've not actually described your
> problem is beyond me.

Can we please stop the petty-fights while figuring out how to work out a
solution here, thanks.

And for context we're hitting this on CI in a bunch of our machines, which
means no more lockdep checking for us. Which is, at least for me, pretty
serious, and why we're throwing complete cpu-anything newbies at that code
trying to come up with some solution to unblock our CI efforts for the
intel gfx driver. Unfortunately our attempts at just disabling lots of
Kconfig symbols proofed futile, so ideas to avoid all that code highly
welcome.

As soon as CI stops hitting this we'll jump out of your inbox, if you want
so.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-17 16:13                   ` Daniel Vetter
@ 2016-02-17 16:14                     ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-17 16:14 UTC (permalink / raw)
  To: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> And for context we're hitting this on CI in a bunch of our machines, which

What's CI ?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-17 16:14                     ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-17 16:14 UTC (permalink / raw)
  To: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> And for context we're hitting this on CI in a bunch of our machines, which

What's CI ?
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Intel-gfx] [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-17 16:14                     ` Peter Zijlstra
@ 2016-02-17 16:33                       ` Daniel Vetter
  -1 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2016-02-17 16:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Wed, Feb 17, 2016 at 05:14:57PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> > And for context we're hitting this on CI in a bunch of our machines, which
> 
> What's CI ?

Continuous integration, aka our own farm of machines dedicated to running
i915.ko testcases. Kinda like 0day (it does pre-merge testing on the m-l
and also post-merge on our own little integration tree), but for just the
graphics team and our needs.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Intel-gfx] [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-17 16:33                       ` Daniel Vetter
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2016-02-17 16:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Wed, Feb 17, 2016 at 05:14:57PM +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> > And for context we're hitting this on CI in a bunch of our machines, which
> 
> What's CI ?

Continuous integration, aka our own farm of machines dedicated to running
i915.ko testcases. Kinda like 0day (it does pre-merge testing on the m-l
and also post-merge on our own little integration tree), but for just the
graphics team and our needs.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Intel-gfx] [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-17 16:33                       ` Daniel Vetter
@ 2016-02-17 16:37                         ` Peter Zijlstra
  -1 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-17 16:37 UTC (permalink / raw)
  To: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Wed, Feb 17, 2016 at 05:33:51PM +0100, Daniel Vetter wrote:
> On Wed, Feb 17, 2016 at 05:14:57PM +0100, Peter Zijlstra wrote:
> > On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> > > And for context we're hitting this on CI in a bunch of our machines, which
> > 
> > What's CI ?
> 
> Continuous integration, aka our own farm of machines dedicated to running
> i915.ko testcases. Kinda like 0day (it does pre-merge testing on the m-l
> and also post-merge on our own little integration tree), but for just the
> graphics team and our needs.

So what patch triggered this new issue? Did cpufreq change or what?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-17 16:37                         ` Peter Zijlstra
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Zijlstra @ 2016-02-17 16:37 UTC (permalink / raw)
  To: Joonas Lahtinen, Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On Wed, Feb 17, 2016 at 05:33:51PM +0100, Daniel Vetter wrote:
> On Wed, Feb 17, 2016 at 05:14:57PM +0100, Peter Zijlstra wrote:
> > On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> > > And for context we're hitting this on CI in a bunch of our machines, which
> > 
> > What's CI ?
> 
> Continuous integration, aka our own farm of machines dedicated to running
> i915.ko testcases. Kinda like 0day (it does pre-merge testing on the m-l
> and also post-merge on our own little integration tree), but for just the
> graphics team and our needs.

So what patch triggered this new issue? Did cpufreq change or what?
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Intel-gfx] [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-17 16:37                         ` Peter Zijlstra
@ 2016-02-18 10:39                           ` Joonas Lahtinen
  -1 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-18 10:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Oleg Nesterov,
	Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

On ke, 2016-02-17 at 17:37 +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 05:33:51PM +0100, Daniel Vetter wrote:
> > On Wed, Feb 17, 2016 at 05:14:57PM +0100, Peter Zijlstra wrote:
> > > On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> > > > And for context we're hitting this on CI in a bunch of our machines, which
> > > 
> > > What's CI ?
> > 
> > Continuous integration, aka our own farm of machines dedicated to running
> > i915.ko testcases. Kinda like 0day (it does pre-merge testing on the m-l
> > and also post-merge on our own little integration tree), but for just the
> > graphics team and our needs.
> 
> So what patch triggered this new issue? Did cpufreq change or what?

It appeared right after enabling lockdep debugging on the continuous
integration system. So we do not have a history of it not being there.

Taking an another look at my code, it could indeed end up in double-
wait-looping scenario if suspend and initialization was performed
simultaneously (it had a couple of other bugs too, fixed in v2).
Strange thing is, I think that should have been caught by cpuhp_lock_*
lockdep tracking.

So I'll move the discussion to linux-pm list to change the CPUfreq code. Thanks for the comments.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-18 10:39                           ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-18 10:39 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Oleg Nesterov, Linux kernel development, David Hildenbrand,
	Paul E. McKenney, Ingo Molnar

On ke, 2016-02-17 at 17:37 +0100, Peter Zijlstra wrote:
> On Wed, Feb 17, 2016 at 05:33:51PM +0100, Daniel Vetter wrote:
> > On Wed, Feb 17, 2016 at 05:14:57PM +0100, Peter Zijlstra wrote:
> > > On Wed, Feb 17, 2016 at 05:13:21PM +0100, Daniel Vetter wrote:
> > > > And for context we're hitting this on CI in a bunch of our machines, which
> > > 
> > > What's CI ?
> > 
> > Continuous integration, aka our own farm of machines dedicated to running
> > i915.ko testcases. Kinda like 0day (it does pre-merge testing on the m-l
> > and also post-merge on our own little integration tree), but for just the
> > graphics team and our needs.
> 
> So what patch triggered this new issue? Did cpufreq change or what?

It appeared right after enabling lockdep debugging on the continuous
integration system. So we do not have a history of it not being there.

Taking an another look at my code, it could indeed end up in double-
wait-looping scenario if suspend and initialization was performed
simultaneously (it had a couple of other bugs too, fixed in v2).
Strange thing is, I think that should have been caught by cpuhp_lock_*
lockdep tracking.

So I'll move the discussion to linux-pm list to change the CPUfreq code. Thanks for the comments.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
  2016-02-15 17:06     ` Peter Zijlstra
@ 2016-02-18 10:54       ` Joonas Lahtinen
  -1 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-18 10:54 UTC (permalink / raw)
  To: Peter Zijlstra, Oleg Nesterov
  Cc: Intel graphics driver community testing & development,
	Linux kernel development, Ingo Molnar, David Hildenbrand,
	Paul E. McKenney, Gautham R. Shenoy, Chris Wilson

Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > > policy->rwsem is locked during driver initialization and the functions called
> > > during init that actually apply CPU limits use get_online_cpus (because they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 
> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 

I do still agree the below would be a worthy change to proceed with.

CC'd Oleg here too to give a comment.

Regards, Joonas

> 
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
>  #ifdef CONFIG_HOTPLUG_CPU
>  /* Stop CPUs going up and down. */
>  
> +extern void cpu_hotplug_init_task(struct task_struct *p);
> +
>  extern void cpu_hotplug_begin(void);
>  extern void cpu_hotplug_done(void);
>  extern void get_online_cpus(void);
> @@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
>  
>  #else		/* CONFIG_HOTPLUG_CPU */
>  
> +static inline void cpu_hotplug_init_task(struct task_struct *p) {}
> +
>  static inline void cpu_hotplug_begin(void) {}
>  static inline void cpu_hotplug_done(void) {}
>  #define get_online_cpus()	do { } while (0)
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
>  	wait_queue_head_t	write_waitq;
>  };
>  
> +#define DEFINE_STATIC_PERCPU_RWSEM(name)				\
> +static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);	\
> +static struct percpu_rw_semaphore name = {				\
> +	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
> +	.fast_read_ctr = &__percpu_rwsem_frc_##name,			\
> +	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
> +	.write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq),	\
> +}
> +
>  extern void percpu_down_read(struct percpu_rw_semaphore *);
>  extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
>  extern void percpu_up_read(struct percpu_rw_semaphore *);
> @@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
>  	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
>  })
>  
> -
>  #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
>  
> +#define percpu_rwsem_assert_held(sem)                          \
> +	lockdep_assert_held(&(sem)->rw_sem)
> +
>  static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
>  					bool read, unsigned long ip)
>  {
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1403,6 +1403,9 @@ struct task_struct {
>  	struct task_struct *last_wakee;
>  
>  	int wake_cpu;
> +#ifdef CONFIG_HOTPLUG_CPU
> +	int cpuhp_ref;
> +#endif
>  #endif
>  	int on_rq;
>  
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -23,6 +23,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "smpboot.h"
>  
> @@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
>  
>  static RAW_NOTIFIER_HEAD(cpu_chain);
>  
> -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
> +/*
> + * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
>   * Should always be manipulated under cpu_add_remove_lock
>   */
>  static int cpu_hotplug_disabled;
>  
>  #ifdef CONFIG_HOTPLUG_CPU
>  
> -static struct {
> -	struct task_struct *active_writer;
> -	/* wait queue to wake up the active_writer */
> -	wait_queue_head_t wq;
> -	/* verifies that no writer will get active while readers are active */
> -	struct mutex lock;
> -	/*
> -	 * Also blocks the new readers during
> -	 * an ongoing cpu hotplug operation.
> -	 */
> -	atomic_t refcount;
> -
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	struct lockdep_map dep_map;
> -#endif
> -} cpu_hotplug = {
> -	.active_writer = NULL,
> -	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
> -	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	.dep_map = {.name = "cpu_hotplug.lock" },
> -#endif
> -};
> -
> -/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
> -#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
> -#define cpuhp_lock_acquire_tryread() \
> -				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
> -#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
> -#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
> +DEFINE_STATIC_PERCPU_RWSEM(hotplug);
>  
> +void cpu_hotplug_init_task(struct task_struct *p)
> +{
> +	if (WARN_ON_ONCE(p->cpuhp_ref))
> +		p->cpuhp_ref = 0;
> +}
>  
>  void get_online_cpus(void)
>  {
>  	might_sleep();
> -	if (cpu_hotplug.active_writer == current)
> +
> +	if (current->cpuhp_ref++) /* read recursion */
>  		return;
> -	cpuhp_lock_acquire_read();
> -	mutex_lock(&cpu_hotplug.lock);
> -	atomic_inc(&cpu_hotplug.refcount);
> -	mutex_unlock(&cpu_hotplug.lock);
> +
> +	percpu_down_read(&hotplug);
>  }
>  EXPORT_SYMBOL_GPL(get_online_cpus);
>  
>  void put_online_cpus(void)
>  {
> -	int refcount;
> -
> -	if (cpu_hotplug.active_writer == current)
> +	if (--current->cpuhp_ref)
>  		return;
>  
> -	refcount = atomic_dec_return(&cpu_hotplug.refcount);
> -	if (WARN_ON(refcount < 0)) /* try to fix things up */
> -		atomic_inc(&cpu_hotplug.refcount);
> -
> -	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
> -		wake_up(&cpu_hotplug.wq);
> -
> -	cpuhp_lock_release();
> -
> +	percpu_up_read(&hotplug);
>  }
>  EXPORT_SYMBOL_GPL(put_online_cpus);
>  
> -/*
> - * This ensures that the hotplug operation can begin only when the
> - * refcount goes to zero.
> - *
> - * Note that during a cpu-hotplug operation, the new readers, if any,
> - * will be blocked by the cpu_hotplug.lock
> - *
> - * Since cpu_hotplug_begin() is always called after invoking
> - * cpu_maps_update_begin(), we can be sure that only one writer is active.
> - *
> - * Note that theoretically, there is a possibility of a livelock:
> - * - Refcount goes to zero, last reader wakes up the sleeping
> - *   writer.
> - * - Last reader unlocks the cpu_hotplug.lock.
> - * - A new reader arrives at this moment, bumps up the refcount.
> - * - The writer acquires the cpu_hotplug.lock finds the refcount
> - *   non zero and goes to sleep again.
> - *
> - * However, this is very difficult to achieve in practice since
> - * get_online_cpus() not an api which is called all that often.
> - *
> - */
>  void cpu_hotplug_begin(void)
>  {
> -	DEFINE_WAIT(wait);
> -
> -	cpu_hotplug.active_writer = current;
> -	cpuhp_lock_acquire();
> -
> -	for (;;) {
> -		mutex_lock(&cpu_hotplug.lock);
> -		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
> -		if (likely(!atomic_read(&cpu_hotplug.refcount)))
> -				break;
> -		mutex_unlock(&cpu_hotplug.lock);
> -		schedule();
> -	}
> -	finish_wait(&cpu_hotplug.wq, &wait);
> +	percpu_down_write(&hotplug);
> +	current->cpuhp_ref++; /* allow read-in-write recursion */
>  }
>  
>  void cpu_hotplug_done(void)
>  {
> -	cpu_hotplug.active_writer = NULL;
> -	mutex_unlock(&cpu_hotplug.lock);
> -	cpuhp_lock_release();
> +	current->cpuhp_ref--;
> +	percpu_up_write(&hotplug);
>  }
>  
>  /*
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1414,6 +1414,8 @@ static struct task_struct *copy_process(
>  	p->sequential_io_avg	= 0;
>  #endif
>  
> +	cpu_hotplug_init_task(p);
> +
>  	/* Perform scheduler related setup. Assign this task to a CPU. */
>  	retval = sched_fork(clone_flags, p);
>  	if (retval)
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -53,6 +53,11 @@ config GENERIC_IO
>  config STMP_DEVICE
>  	bool
>  
> +config PERCPU_RWSEM_HOTPLUG
> +	def_bool y
> +	depends on HOTPLUG_CPU
> +	select PERCPU_RWSEM
> +
>  config ARCH_USE_CMPXCHG_LOCKREF
>  	bool
>  
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting
@ 2016-02-18 10:54       ` Joonas Lahtinen
  0 siblings, 0 replies; 32+ messages in thread
From: Joonas Lahtinen @ 2016-02-18 10:54 UTC (permalink / raw)
  To: Peter Zijlstra, Oleg Nesterov
  Cc: Gautham R. Shenoy,
	Intel graphics driver community testing & development,
	Linux kernel development, David Hildenbrand, Paul E. McKenney,
	Ingo Molnar

Hi,

On ma, 2016-02-15 at 18:06 +0100, Peter Zijlstra wrote:
> On Mon, Feb 15, 2016 at 03:17:55PM +0100, Peter Zijlstra wrote:
> > On Mon, Feb 15, 2016 at 02:36:43PM +0200, Joonas Lahtinen wrote:
> > > Instead of implementing a custom locked reference counting, use lockref.
> > > 
> > > Current implementation leads to a deadlock splat on Intel SKL platforms
> > > when lockdep debugging is enabled.
> > > 
> > > This is due to few of CPUfreq drivers (including Intel P-state) having this;
> > > policy->rwsem is locked during driver initialization and the functions called
> > > during init that actually apply CPU limits use get_online_cpus (because they
> > > have other calling paths too), which will briefly lock cpu_hotplug.lock to
> > > increase cpu_hotplug.refcount.
> > > 
> > > On later calling path, when doing a suspend, when cpu_hotplug_begin() is called
> > > in disable_nonboot_cpus(), callbacks to CPUfreq functions get called after,
> > > which will lock policy->rwsem and cpu_hotplug.lock is already held by
> > > cpu_hotplug_begin() and we do have a potential deadlock scenario reported by
> > > our CI system (though it is a very unlikely one). See the Bugzilla link for more
> > > details.
> > 
> > I've been meaning to change the thing into a percpu-rwsem, I just
> > haven't had time to look into the lockdep splat that generated.
> 
> 
> The below has plenty lockdep issues because percpu-rwsem is
> reader-writer fair (like the regular rwsem), so it does throw up a fair
> number of very icky issues.
> 
> If at all possible, I'd really rather fix those and have a 'saner'
> hotplug lock, rather than muddle on with open-coded horror lock we have
> now.
> 

I do still agree the below would be a worthy change to proceed with.

CC'd Oleg here too to give a comment.

Regards, Joonas

> 
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -225,6 +225,8 @@ extern struct bus_type cpu_subsys;
>  #ifdef CONFIG_HOTPLUG_CPU
>  /* Stop CPUs going up and down. */
>  
> +extern void cpu_hotplug_init_task(struct task_struct *p);
> +
>  extern void cpu_hotplug_begin(void);
>  extern void cpu_hotplug_done(void);
>  extern void get_online_cpus(void);
> @@ -242,6 +244,8 @@ int cpu_down(unsigned int cpu);
>  
>  #else		/* CONFIG_HOTPLUG_CPU */
>  
> +static inline void cpu_hotplug_init_task(struct task_struct *p) {}
> +
>  static inline void cpu_hotplug_begin(void) {}
>  static inline void cpu_hotplug_done(void) {}
>  #define get_online_cpus()	do { } while (0)
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -16,6 +16,15 @@ struct percpu_rw_semaphore {
>  	wait_queue_head_t	write_waitq;
>  };
>  
> +#define DEFINE_STATIC_PERCPU_RWSEM(name)				\
> +static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_frc_##name);	\
> +static struct percpu_rw_semaphore name = {				\
> +	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
> +	.fast_read_ctr = &__percpu_rwsem_frc_##name,			\
> +	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
> +	.write_waitq = __WAIT_QUEUE_HEAD_INITIALIZER(name.write_waitq),	\
> +}
> +
>  extern void percpu_down_read(struct percpu_rw_semaphore *);
>  extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
>  extern void percpu_up_read(struct percpu_rw_semaphore *);
> @@ -33,9 +42,11 @@ extern void percpu_free_rwsem(struct per
>  	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
>  })
>  
> -
>  #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
>  
> +#define percpu_rwsem_assert_held(sem)                          \
> +	lockdep_assert_held(&(sem)->rw_sem)
> +
>  static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
>  					bool read, unsigned long ip)
>  {
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1403,6 +1403,9 @@ struct task_struct {
>  	struct task_struct *last_wakee;
>  
>  	int wake_cpu;
> +#ifdef CONFIG_HOTPLUG_CPU
> +	int cpuhp_ref;
> +#endif
>  #endif
>  	int on_rq;
>  
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -23,6 +23,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "smpboot.h"
>  
> @@ -51,121 +52,52 @@ EXPORT_SYMBOL(cpu_notifier_register_done
>  
>  static RAW_NOTIFIER_HEAD(cpu_chain);
>  
> -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
> +/*
> + * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
>   * Should always be manipulated under cpu_add_remove_lock
>   */
>  static int cpu_hotplug_disabled;
>  
>  #ifdef CONFIG_HOTPLUG_CPU
>  
> -static struct {
> -	struct task_struct *active_writer;
> -	/* wait queue to wake up the active_writer */
> -	wait_queue_head_t wq;
> -	/* verifies that no writer will get active while readers are active */
> -	struct mutex lock;
> -	/*
> -	 * Also blocks the new readers during
> -	 * an ongoing cpu hotplug operation.
> -	 */
> -	atomic_t refcount;
> -
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	struct lockdep_map dep_map;
> -#endif
> -} cpu_hotplug = {
> -	.active_writer = NULL,
> -	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
> -	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	.dep_map = {.name = "cpu_hotplug.lock" },
> -#endif
> -};
> -
> -/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
> -#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
> -#define cpuhp_lock_acquire_tryread() \
> -				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
> -#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
> -#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
> +DEFINE_STATIC_PERCPU_RWSEM(hotplug);
>  
> +void cpu_hotplug_init_task(struct task_struct *p)
> +{
> +	if (WARN_ON_ONCE(p->cpuhp_ref))
> +		p->cpuhp_ref = 0;
> +}
>  
>  void get_online_cpus(void)
>  {
>  	might_sleep();
> -	if (cpu_hotplug.active_writer == current)
> +
> +	if (current->cpuhp_ref++) /* read recursion */
>  		return;
> -	cpuhp_lock_acquire_read();
> -	mutex_lock(&cpu_hotplug.lock);
> -	atomic_inc(&cpu_hotplug.refcount);
> -	mutex_unlock(&cpu_hotplug.lock);
> +
> +	percpu_down_read(&hotplug);
>  }
>  EXPORT_SYMBOL_GPL(get_online_cpus);
>  
>  void put_online_cpus(void)
>  {
> -	int refcount;
> -
> -	if (cpu_hotplug.active_writer == current)
> +	if (--current->cpuhp_ref)
>  		return;
>  
> -	refcount = atomic_dec_return(&cpu_hotplug.refcount);
> -	if (WARN_ON(refcount < 0)) /* try to fix things up */
> -		atomic_inc(&cpu_hotplug.refcount);
> -
> -	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
> -		wake_up(&cpu_hotplug.wq);
> -
> -	cpuhp_lock_release();
> -
> +	percpu_up_read(&hotplug);
>  }
>  EXPORT_SYMBOL_GPL(put_online_cpus);
>  
> -/*
> - * This ensures that the hotplug operation can begin only when the
> - * refcount goes to zero.
> - *
> - * Note that during a cpu-hotplug operation, the new readers, if any,
> - * will be blocked by the cpu_hotplug.lock
> - *
> - * Since cpu_hotplug_begin() is always called after invoking
> - * cpu_maps_update_begin(), we can be sure that only one writer is active.
> - *
> - * Note that theoretically, there is a possibility of a livelock:
> - * - Refcount goes to zero, last reader wakes up the sleeping
> - *   writer.
> - * - Last reader unlocks the cpu_hotplug.lock.
> - * - A new reader arrives at this moment, bumps up the refcount.
> - * - The writer acquires the cpu_hotplug.lock finds the refcount
> - *   non zero and goes to sleep again.
> - *
> - * However, this is very difficult to achieve in practice since
> - * get_online_cpus() not an api which is called all that often.
> - *
> - */
>  void cpu_hotplug_begin(void)
>  {
> -	DEFINE_WAIT(wait);
> -
> -	cpu_hotplug.active_writer = current;
> -	cpuhp_lock_acquire();
> -
> -	for (;;) {
> -		mutex_lock(&cpu_hotplug.lock);
> -		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
> -		if (likely(!atomic_read(&cpu_hotplug.refcount)))
> -				break;
> -		mutex_unlock(&cpu_hotplug.lock);
> -		schedule();
> -	}
> -	finish_wait(&cpu_hotplug.wq, &wait);
> +	percpu_down_write(&hotplug);
> +	current->cpuhp_ref++; /* allow read-in-write recursion */
>  }
>  
>  void cpu_hotplug_done(void)
>  {
> -	cpu_hotplug.active_writer = NULL;
> -	mutex_unlock(&cpu_hotplug.lock);
> -	cpuhp_lock_release();
> +	current->cpuhp_ref--;
> +	percpu_up_write(&hotplug);
>  }
>  
>  /*
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1414,6 +1414,8 @@ static struct task_struct *copy_process(
>  	p->sequential_io_avg	= 0;
>  #endif
>  
> +	cpu_hotplug_init_task(p);
> +
>  	/* Perform scheduler related setup. Assign this task to a CPU. */
>  	retval = sched_fork(clone_flags, p);
>  	if (retval)
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -53,6 +53,11 @@ config GENERIC_IO
>  config STMP_DEVICE
>  	bool
>  
> +config PERCPU_RWSEM_HOTPLUG
> +	def_bool y
> +	depends on HOTPLUG_CPU
> +	select PERCPU_RWSEM
> +
>  config ARCH_USE_CMPXCHG_LOCKREF
>  	bool
>  
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2016-02-18 10:54 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-15 12:36 [PATCH] [RFC] kernel/cpu: Use lockref for online CPU reference counting Joonas Lahtinen
2016-02-15 12:36 ` Joonas Lahtinen
2016-02-15 14:17 ` Peter Zijlstra
2016-02-15 14:17   ` Peter Zijlstra
2016-02-15 17:06   ` Peter Zijlstra
2016-02-15 17:06     ` Peter Zijlstra
2016-02-16  8:49     ` Joonas Lahtinen
2016-02-16  8:49       ` Joonas Lahtinen
2016-02-16  9:14       ` Peter Zijlstra
2016-02-16  9:14         ` Peter Zijlstra
2016-02-16 10:51         ` Joonas Lahtinen
2016-02-16 10:51           ` Joonas Lahtinen
2016-02-16 11:07           ` Peter Zijlstra
2016-02-16 11:07             ` Peter Zijlstra
2016-02-17 12:47             ` Joonas Lahtinen
2016-02-17 12:47               ` Joonas Lahtinen
2016-02-17 14:20               ` Peter Zijlstra
2016-02-17 14:20                 ` Peter Zijlstra
2016-02-17 16:13                 ` Daniel Vetter
2016-02-17 16:13                   ` Daniel Vetter
2016-02-17 16:14                   ` Peter Zijlstra
2016-02-17 16:14                     ` Peter Zijlstra
2016-02-17 16:33                     ` [Intel-gfx] " Daniel Vetter
2016-02-17 16:33                       ` Daniel Vetter
2016-02-17 16:37                       ` Peter Zijlstra
2016-02-17 16:37                         ` Peter Zijlstra
2016-02-18 10:39                         ` [Intel-gfx] " Joonas Lahtinen
2016-02-18 10:39                           ` Joonas Lahtinen
2016-02-18 10:54     ` Joonas Lahtinen
2016-02-18 10:54       ` Joonas Lahtinen
2016-02-15 17:18   ` [Intel-gfx] " Daniel Vetter
2016-02-15 17:18     ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.