All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>,
	Byungchul Park <byungchul.park@lge.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	lkml <linux-kernel@vger.kernel.org>
Subject: Re: WARNING: possible circular locking dependency detected
Date: Mon, 28 Aug 2017 09:41:38 +0200	[thread overview]
Message-ID: <20170828074138.6zeceyffasjmaazx@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20170825144755.ms2h2j2xe6gznnqi@linutronix.de>

On Fri, Aug 25, 2017 at 04:47:55PM +0200, Sebastian Andrzej Siewior wrote:
> On 2017-08-25 12:03:04 [+0200], Borislav Petkov wrote:
> > ======================================================
> > WARNING: possible circular locking dependency detected
> > 4.13.0-rc6+ #1 Not tainted
> > ------------------------------------------------------
> 
> While looking at this, I stumbled upon another one also enabled by
> "completion annotation" in the TIP:
> 
> | ======================================================
> | WARNING: possible circular locking dependency detected
> | 4.13.0-rc6-00758-gd80d4177391f-dirty #112 Not tainted
> | ------------------------------------------------------
> | cpu-off.sh/426 is trying to acquire lock:
> |  ((complete)&st->done){+.+.}, at: [<ffffffff810cb344>] takedown_cpu+0x84/0xf0
> |
> | but task is already holding lock:
> |  (sparse_irq_lock){+.+.}, at: [<ffffffff811220f2>] irq_lock_sparse+0x12/0x20
> |
> | which lock already depends on the new lock.
> |
> | the existing dependency chain (in reverse order) is:
> |
> | -> #1 (sparse_irq_lock){+.+.}:
> |        __mutex_lock+0x88/0x9a0
> |        mutex_lock_nested+0x16/0x20
> |        irq_lock_sparse+0x12/0x20
> |        irq_affinity_online_cpu+0x13/0xd0
> |        cpuhp_invoke_callback+0x4a/0x130
> |
> | -> #0 ((complete)&st->done){+.+.}:
> |        check_prev_add+0x351/0x700
> |        __lock_acquire+0x114a/0x1220
> |        lock_acquire+0x47/0x70
> |        wait_for_completion+0x5c/0x180
> |        takedown_cpu+0x84/0xf0
> |        cpuhp_invoke_callback+0x4a/0x130
> |        cpuhp_down_callbacks+0x3d/0x80
> …
> |
> | other info that might help us debug this:
> |
> |  Possible unsafe locking scenario:
> |        CPU0                    CPU1
> |        ----                    ----
> |   lock(sparse_irq_lock);
> |                                lock((complete)&st->done);
> |                                lock(sparse_irq_lock);
> |   lock((complete)&st->done);
> |
> |  *** DEADLOCK ***
> 
> We hold the sparse_irq_lock lock while waiting for the completion in the
> CPU-down case and in the CPU-up case we acquire the sparse_irq_lock lock
> while the other CPU is waiting for the completion.
> This is not an issue if my interpretation of lockdep here is correct.
> 
> How do we annotate this?

Does something like so work?

---
 include/linux/completion.h | 15 ++++++++++++---
 kernel/kthread.c           | 14 +++++++++++++-
 kernel/sched/completion.c  | 18 +++++++++++++-----
 3 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 791f053f28b7..0eccd2d44c85 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -34,9 +34,9 @@ struct completion {
 };
 
 #ifdef CONFIG_LOCKDEP_COMPLETIONS
-static inline void complete_acquire(struct completion *x)
+static inline void complete_acquire(struct completion *x, int subclass)
 {
-	lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
+	lock_acquire_exclusive((struct lockdep_map *)&x->map, subclass, 0, NULL, _RET_IP_);
 }
 
 static inline void complete_release(struct completion *x)
@@ -59,7 +59,7 @@ do {									\
 } while (0)
 #else
 #define init_completion(x) __init_completion(x)
-static inline void complete_acquire(struct completion *x) {}
+static inline void complete_acquire(struct completion *x, int subclass) {}
 static inline void complete_release(struct completion *x) {}
 static inline void complete_release_commit(struct completion *x) {}
 #endif
@@ -132,6 +132,15 @@ static inline void reinit_completion(struct completion *x)
 }
 
 extern void wait_for_completion(struct completion *);
+
+#ifndef CONFIG_LOCKDEP
+static inline void
+wait_for_completion_nested(struct completion *x, int subclass)
+{
+	wait_for_completion(x);
+}
+#endif
+
 extern void wait_for_completion_io(struct completion *);
 extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 26db528c1d88..6092702dd908 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -485,7 +485,19 @@ int kthread_park(struct task_struct *k)
 		set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 		if (k != current) {
 			wake_up_process(k);
-			wait_for_completion(&kthread->parked);
+			/*
+			 * CPU-UP		CPU-DOWN
+			 *
+			 * cpu_hotplug_lock
+			 * wait_for_completion()
+			 * 			cpu_hotplug_lock
+			 * 			complete()
+			 *
+			 * Which normally spells deadlock, except of course
+			 * that up and down are globally serialized so the
+			 * above cannot in fact happen concurrently.
+			 */
+			wait_for_completion_nested(&kthread->parked, 1);
 		}
 	}
 
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index cc873075c3bd..18ca9b7ef677 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -101,11 +101,11 @@ do_wait_for_common(struct completion *x,
 
 static inline long __sched
 __wait_for_common(struct completion *x,
-		  long (*action)(long), long timeout, int state)
+		  long (*action)(long), long timeout, int state, int subclass)
 {
 	might_sleep();
 
-	complete_acquire(x);
+	complete_acquire(x, subclass);
 
 	spin_lock_irq(&x->wait.lock);
 	timeout = do_wait_for_common(x, action, timeout, state);
@@ -117,9 +117,9 @@ __wait_for_common(struct completion *x,
 }
 
 static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
+wait_for_common(struct completion *x, long timeout, int state, int subclass)
 {
-	return __wait_for_common(x, schedule_timeout, timeout, state);
+	return __wait_for_common(x, schedule_timeout, timeout, state, subclass);
 }
 
 static long __sched
@@ -140,10 +140,18 @@ wait_for_common_io(struct completion *x, long timeout, int state)
  */
 void __sched wait_for_completion(struct completion *x)
 {
-	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE, 0);
 }
 EXPORT_SYMBOL(wait_for_completion);
 
+#ifdef CONFIG_LOCKDEP
+void __sched wait_for_completion_nested(struct completion *x, int subclass)
+{
+	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE, subclass);
+}
+EXPORT_SYMBOL(wait_for_completion);
+#endif
+
 /**
  * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
  * @x:  holds the state of this particular completion

  parent reply	other threads:[~2017-08-28  7:41 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-25 10:03 WARNING: possible circular locking dependency detected Borislav Petkov
2017-08-25 11:45 ` Borislav Petkov
2017-08-25 14:47 ` Sebastian Andrzej Siewior
2017-08-25 16:12   ` Byungchul Park
2017-08-25 16:21     ` Thomas Gleixner
2017-08-28  7:41   ` Peter Zijlstra [this message]
2017-08-28 14:11   ` Peter Zijlstra
2017-08-29 19:34   ` Peter Zijlstra
2017-08-25 16:42 ` Sebastian Andrzej Siewior
2017-08-28 14:58 ` Peter Zijlstra
2017-08-28 15:06   ` Peter Zijlstra
2017-08-28 16:32     ` Peter Zijlstra
2017-08-29 17:40   ` Thomas Gleixner
2017-08-29 19:49     ` Peter Zijlstra
2017-08-29 20:10       ` Thomas Gleixner
2017-08-30  5:47         ` Peter Zijlstra
2017-08-31  7:08           ` Thomas Gleixner
2017-08-31  7:37             ` Peter Zijlstra
2017-08-31  7:55               ` Thomas Gleixner
2017-08-31  8:09                 ` Peter Zijlstra
2017-08-31  8:15                   ` Thomas Gleixner
2017-08-31 21:24                     ` Thomas Gleixner
2017-09-01 20:32                       ` Peter Zijlstra
2018-11-14  2:41 Qian Cai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170828074138.6zeceyffasjmaazx@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bigeasy@linutronix.de \
    --cc=bp@alien8.de \
    --cc=byungchul.park@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.