linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Vegard Nossum <vegard.nossum@gmail.com>,
	Ingo Molnar <mingo@kernel.org>,
	Dave Jones <davej@codemonkey.org.uk>, Chris Mason <clm@fb.com>,
	Jens Axboe <axboe@fb.com>, Andy Lutomirski <luto@amacapital.net>,
	Andy Lutomirski <luto@kernel.org>,
	Al Viro <viro@zeniv.linux.org.uk>, Josef Bacik <jbacik@fb.com>,
	David Sterba <dsterba@suse.com>,
	linux-btrfs <linux-btrfs@vger.kernel.org>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	Dave Chinner <david@fromorbit.com>
Subject: Re: bio linked list corruption.
Date: Tue, 6 Dec 2016 09:16:59 +0100	[thread overview]
Message-ID: <20161206081659.GV3092@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <CA+55aFwC3nfmum6oPoBRU325CKjK-g3MuqW+bhq3+9DnE_4fgw@mail.gmail.com>

On Mon, Dec 05, 2016 at 12:35:52PM -0800, Linus Torvalds wrote:
> Adding the scheduler people to the participants list, and re-attaching
> the patch, because while this patch is internal to the VM code, the
> issue itself is not.
> 
> There might well be other cases where somebody goes "wake_up_all()"
> will wake everybody up, so I can put the wait queue head on the stack,
> and then after I've woken people up I can return".
> 
> Ingo/PeterZ: the reason that does *not* work is that "wake_up_all()"
> does make sure that everybody is woken up, but the usual autoremove
> wake function only removes the wakeup entry if the process was woken
> up by that particular wakeup. If something else had woken it up, the
> entry remains on the list, and the waker in this case returned with
> the wait head still containing entries.
> 
> Which is deadly when the wait queue head is on the stack.

Yes, very much so.

> So I'm wondering if we should make that "synchronous_wake_function()"
> available generally, and maybe introduce a DEFINE_WAIT_SYNC() helper
> that uses it.

We could also do some debug code that tracks the ONSTACK ness and warns
on autoremove. Something like the below, equally untested.

> 
> Of course, I'm really hoping that this shmem.c use is the _only_ such
> case.  But I doubt it.

$ git grep DECLARE_WAIT_QUEUE_HEAD_ONSTACK | wc -l
28

---


diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2408e8d5c05c..199faaa89847 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -39,6 +39,9 @@ struct wait_bit_queue {
 struct __wait_queue_head {
 	spinlock_t		lock;
 	struct list_head	task_list;
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	int			onstack;
+#endif
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -56,9 +59,18 @@ struct task_struct;
 #define DECLARE_WAITQUEUE(name, tsk)					\
 	wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
 
-#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
+#ifdef CONFIG_DEBUG_WAITQUEUE
+#define ___WAIT_QUEUE_ONSTACK(onstack)	.onstack = (onstack),
+#else
+#define ___WAIT_QUEUE_ONSTACK(onstack)
+#endif
+
+#define ___WAIT_QUEUE_HEAD_INITIALIZER(name, onstack)	{		\
 	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
-	.task_list	= { &(name).task_list, &(name).task_list } }
+	.task_list	= { &(name).task_list, &(name).task_list },	\
+	___WAIT_QUEUE_ONSTACK(onstack) }
+
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) ___WAIT_QUEUE_HEAD_INITIALIZER(name, 0)
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
@@ -80,11 +92,12 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct
 
 #ifdef CONFIG_LOCKDEP
 # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
-	({ init_waitqueue_head(&name); name; })
+	({ init_waitqueue_head(&name); (name).onstack = 1; name; })
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
 	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
 #else
-# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
+# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
+	wait_queue_head_t name = ___WAIT_QUEUE_HEAD_INITIALIZER(name, 1)
 #endif
 
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 9453efe9b25a..746d00117d08 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -156,6 +156,13 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
+static inline void prepare_debug(wait_queue_head_t *q, wait_queue_t *wait)
+{
+#ifdef CONFIG_DEBUG_WAITQUEUE
+	WARN_ON_ONCE(q->onstack && wait->func == autoremove_wake_function)
+#endif
+}
+
 /*
  * Note: we use "set_current_state()" _after_ the wait-queue add,
  * because we need a memory barrier there on SMP, so that any
@@ -178,6 +185,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	if (list_empty(&wait->task_list))
 		__add_wait_queue(q, wait);
 	set_current_state(state);
+	prepare_debug(q, wait);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
@@ -192,6 +200,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	if (list_empty(&wait->task_list))
 		__add_wait_queue_tail(q, wait);
 	set_current_state(state);
+	prepare_debug(q, wait);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
@@ -235,6 +244,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 		}
 		set_current_state(state);
 	}
+	prepare_debug(q, wait);
 	spin_unlock_irqrestore(&q->lock, flags);
 
 	return ret;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9bb7d825ba14..af2ef22a5b2b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1235,6 +1235,14 @@ config DEBUG_PI_LIST
 
 	  If unsure, say N.
 
+config DEBUG_WAITQUEUE
+	bool "Debug waitqueue"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to do sanity checking on waitqueue usage
+
+	  If unsure, say N.
+
 config DEBUG_SG
 	bool "Debug SG table operations"
 	depends on DEBUG_KERNEL

  parent reply	other threads:[~2016-12-06  8:18 UTC|newest]

Thread overview: 118+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-11 14:45 btrfs bio linked list corruption Dave Jones
2016-10-11 15:11 ` Al Viro
2016-10-11 15:19   ` Dave Jones
2016-10-11 15:20     ` Chris Mason
2016-10-11 15:49       ` Dave Jones
2016-10-11 15:54 ` Chris Mason
2016-10-11 16:25   ` Dave Jones
2016-10-12 13:47   ` Dave Jones
2016-10-12 14:40     ` Dave Jones
2016-10-12 14:42       ` Chris Mason
2016-10-13 18:16         ` Dave Jones
2016-10-13 21:18           ` Chris Mason
2016-10-13 21:56             ` Dave Jones
2016-10-16  0:42             ` Dave Jones
2016-10-18  1:07               ` Chris Mason
2016-10-18 22:42 ` Dave Jones
2016-10-18 23:12   ` Jens Axboe
2016-10-18 23:31     ` Chris Mason
2016-10-18 23:36       ` Jens Axboe
2016-10-18 23:39       ` Linus Torvalds
2016-10-18 23:42         ` Chris Mason
2016-10-19  0:10           ` Linus Torvalds
2016-10-19  0:19             ` Chris Mason
2016-10-19  0:28             ` Linus Torvalds
2016-10-20 22:48               ` Dave Jones
2016-10-19  1:05             ` Andy Lutomirski
2016-10-20 22:50               ` Dave Jones
2016-10-20 23:01                 ` Andy Lutomirski
2016-10-20 23:03                   ` Dave Jones
2016-10-20 23:23                     ` Andy Lutomirski
2016-10-21 20:02                       ` Dave Jones
2016-10-21 20:17                         ` Chris Mason
2016-10-21 20:23                           ` Dave Jones
2016-10-21 20:38                             ` Chris Mason
2016-10-21 20:41                               ` Josef Bacik
2016-10-21 21:11                                 ` Dave Jones
2016-10-22 15:20                         ` Dave Jones
2016-10-23 21:32                           ` Chris Mason
2016-10-24  4:40                             ` Dave Jones
2016-10-24 13:42                               ` Chris Mason
2016-10-26  0:27                                 ` Dave Jones
2016-10-26  1:33                                   ` Linus Torvalds
2016-10-26  1:39                                     ` Linus Torvalds
2016-10-26 16:30                                       ` Dave Jones
2016-10-26 16:48                                         ` Linus Torvalds
2016-10-26 18:18                                           ` Dave Jones
2016-10-26 18:42                                           ` Dave Jones
2016-10-26 19:06                                             ` Linus Torvalds
2016-10-26 20:00                                               ` Chris Mason
2016-10-26 21:52                                                 ` Chris Mason
2016-10-26 22:21                                                   ` Linus Torvalds
2016-10-26 22:40                                                     ` Dave Jones
2016-10-26 22:51                                                       ` Linus Torvalds
2016-10-26 22:55                                                         ` Jens Axboe
2016-10-26 22:58                                                         ` Linus Torvalds
2016-10-26 23:03                                                           ` Jens Axboe
2016-10-26 23:07                                                             ` Dave Jones
2016-10-26 23:08                                                             ` Linus Torvalds
2016-10-26 23:20                                                               ` Jens Axboe
2016-10-26 23:38                                                                 ` Chris Mason
2016-10-26 23:47                                                                   ` Dave Jones
2016-10-27  0:00                                                                     ` Jens Axboe
2016-10-27 13:33                                                                       ` Chris Mason
2016-10-31 18:55                                                                     ` Dave Jones
2016-10-31 19:35                                                                       ` Linus Torvalds
2016-10-31 19:44                                                                         ` Chris Mason
2016-11-06 16:55                                                                           ` btrfs btree_ctree_super fault Dave Jones
2016-11-08 14:59                                                                             ` Dave Jones
2016-11-08 15:08                                                                               ` Chris Mason
2016-11-10 14:35                                                                                 ` Dave Jones
2016-11-10 15:27                                                                                   ` Chris Mason
2016-11-23 19:34                                                                           ` bio linked list corruption Dave Jones
2016-11-23 19:58                                                                             ` Dave Jones
2016-12-01 15:32                                                                               ` btrfs_destroy_inode warn (outstanding extents) Dave Jones
2016-12-03 16:48                                                                                 ` Dave Jones
2016-12-07 16:15                                                                                   ` Dave Jones
2016-12-09 21:12                                                                                 ` Steven Rostedt
2016-12-04 23:04                                                                               ` bio linked list corruption Vegard Nossum
2016-12-05 11:10                                                                                 ` Vegard Nossum
2016-12-05 17:09                                                                                   ` Vegard Nossum
2016-12-05 17:21                                                                                     ` Dave Jones
2016-12-05 17:55                                                                                     ` Linus Torvalds
2016-12-05 19:11                                                                                       ` Vegard Nossum
2016-12-05 20:10                                                                                         ` Linus Torvalds
2016-12-05 20:35                                                                                           ` Linus Torvalds
2016-12-05 21:33                                                                                             ` Vegard Nossum
2016-12-06  8:42                                                                                               ` Vegard Nossum
2016-12-06  8:16                                                                                             ` Peter Zijlstra [this message]
2016-12-06  8:36                                                                                               ` Ingo Molnar
2016-12-06 16:33                                                                                               ` Linus Torvalds
2016-12-05 20:10                                                                                         ` Vegard Nossum
2016-12-05 18:11                                                                                 ` Andy Lutomirski
2016-12-05 18:25                                                                                   ` Linus Torvalds
2016-12-05 18:26                                                                                   ` Vegard Nossum
2016-10-26 23:19                                                             ` Chris Mason
2016-10-26 23:21                                                               ` Jens Axboe
2016-10-27  6:33                                                             ` Christoph Hellwig
2016-10-27 16:34                                                               ` Linus Torvalds
2016-10-27 16:36                                                                 ` Jens Axboe
2016-10-26 23:01                                                         ` Dave Jones
2016-10-26 23:05                                                           ` Jens Axboe
2016-10-26 22:52                                                       ` Jens Axboe
2016-10-26 22:07                                                 ` Linus Torvalds
2016-10-26 22:54                                                   ` Chris Mason
2016-10-27  5:41                                   ` Dave Chinner
2016-10-27 17:23                                     ` Dave Jones
2016-10-24 20:06                               ` Andy Lutomirski
2016-10-24 20:46                                 ` Linus Torvalds
2016-10-24 21:17                                   ` Linus Torvalds
2016-10-24 21:50                                     ` Linus Torvalds
2016-10-24 22:02                                       ` Chris Mason
2016-10-24 22:42                                   ` Andy Lutomirski
2016-10-25  0:00                                     ` Linus Torvalds
2016-10-25  1:09                                       ` Andy Lutomirski
2016-10-19 17:09           ` Philipp Hahn
2016-10-19 17:43             ` Linus Torvalds
2016-10-20  6:52               ` Ingo Molnar
2016-10-20  7:17                 ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161206081659.GV3092@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=axboe@fb.com \
    --cc=clm@fb.com \
    --cc=davej@codemonkey.org.uk \
    --cc=david@fromorbit.com \
    --cc=dsterba@suse.com \
    --cc=jbacik@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vegard.nossum@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).