linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH (resend) 0/2] workqueue lockup debugging
@ 2007-07-17 12:53 Johannes Berg
  2007-07-17 12:53 ` [PATCH (resend) 1/2] workqueue: debug flushing deadlocks with lockdep Johannes Berg
  2007-07-17 12:53 ` [PATCH (resend) 2/2] workqueue: debug work related " Johannes Berg
  0 siblings, 2 replies; 4+ messages in thread
From: Johannes Berg @ 2007-07-17 12:53 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Oleg Nesterov, Ingo Molnar, Peter Zijlstra

I recently experienced a lockup in the wireless code due to the
scenario described in patch 1 and wanted to have lockdep warn
about such scenarios as a way to catch such bugs in other
subsystems as well as to make sure we wouldn't get such things
ever again.

In discussions with Oleg and Ingo it turned out that there's
another possible deadlock with the now-recommended API of
cancel_work_sync() and I also found a way to make lockdep
catch that; it is addressed by patch 2.

Ingo thinks this could also have caught a similar lockup in the
DVB subsystem and would like to see it in .23.

johannes


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH (resend) 1/2] workqueue: debug flushing deadlocks with lockdep
  2007-07-17 12:53 [PATCH (resend) 0/2] workqueue lockup debugging Johannes Berg
@ 2007-07-17 12:53 ` Johannes Berg
  2007-07-18 17:28   ` [PATCH (updated) " Johannes Berg
  2007-07-17 12:53 ` [PATCH (resend) 2/2] workqueue: debug work related " Johannes Berg
  1 sibling, 1 reply; 4+ messages in thread
From: Johannes Berg @ 2007-07-17 12:53 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Oleg Nesterov, Ingo Molnar, Peter Zijlstra

[-- Attachment #1: workqueue-debug-1.patch --]
[-- Type: text/plain, Size: 4423 bytes --]

In the following scenario:

code path 1:
  my_function() -> lock(L1); ...; flush_workqueue(); ...

code path 2:
  run_workqueue() -> my_work() -> ...; lock(L1); ...

you can get a deadlock when my_work() is queued or running
but my_function() has acquired L1 already.

This patch adds a pseudo-lock to each workqueue to make lockdep
warn about this scenario.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

---
 include/linux/workqueue.h |   20 +++++++++++++++++---
 kernel/workqueue.c        |   20 +++++++++++++++++---
 2 files changed, 34 insertions(+), 6 deletions(-)

--- wireless-dev.orig/kernel/workqueue.c	2007-07-17 13:45:31.170964463 +0200
+++ wireless-dev/kernel/workqueue.c	2007-07-17 14:52:07.980964463 +0200
@@ -32,6 +32,7 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -257,7 +261,9 @@ static void run_workqueue(struct cpu_wor
 
 		BUG_ON(get_wq_data(work) != cwq);
 		work_clear_pending(work);
+		lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 		f(work);
+		lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -376,6 +382,8 @@ void fastcall flush_workqueue(struct wor
 	int cpu;
 
 	might_sleep();
+	lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&wq->lockdep_map, 1, _THIS_IP_);
 	for_each_cpu_mask(cpu, *cpu_map)
 		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
@@ -695,8 +703,10 @@ static void start_workqueue_thread(struc
 	}
 }
 
-struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread, int freezeable)
+struct workqueue_struct *__create_workqueue_key(const char *name,
+						int singlethread,
+						int freezeable,
+						struct lock_class_key *key)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -713,6 +723,7 @@ struct workqueue_struct *__create_workqu
 	}
 
 	wq->name = name;
+	lockdep_init_map(&wq->lockdep_map, name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
@@ -741,7 +752,7 @@ struct workqueue_struct *__create_workqu
 	}
 	return wq;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
@@ -752,6 +763,9 @@ static void cleanup_workqueue_thread(str
 	if (cwq->thread == NULL)
 		return;
 
+	lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
 	/*
 	 * If the caller is CPU_DEAD the single flush_cpu_workqueue()
 	 * is not enough, a concurrent flush_workqueue() can insert a
--- wireless-dev.orig/include/linux/workqueue.h	2007-07-17 13:45:29.920964463 +0200
+++ wireless-dev/include/linux/workqueue.h	2007-07-17 14:52:07.890964463 +0200
@@ -118,9 +118,23 @@ struct execute_work {
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
-extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread,
-						    int freezeable);
+extern struct workqueue_struct *
+__create_workqueue_key(const char *name, int singlethread,
+		       int freezeable, struct lock_class_key *key);
+
+#ifdef CONFIG_LOCKDEP
+#define __create_workqueue(name, singlethread, freezeable)	\
+({								\
+	static struct lock_class_key __key;			\
+								\
+	__create_workqueue_key((name), (singlethread),		\
+			       (freezeable), &__key);		\
+})
+#else
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), NULL)
+#endif
+
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)

-- 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH (resend) 2/2] workqueue: debug work related deadlocks with lockdep
  2007-07-17 12:53 [PATCH (resend) 0/2] workqueue lockup debugging Johannes Berg
  2007-07-17 12:53 ` [PATCH (resend) 1/2] workqueue: debug flushing deadlocks with lockdep Johannes Berg
@ 2007-07-17 12:53 ` Johannes Berg
  1 sibling, 0 replies; 4+ messages in thread
From: Johannes Berg @ 2007-07-17 12:53 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Oleg Nesterov, Ingo Molnar, Peter Zijlstra

[-- Attachment #1: workqueue-debug-2.patch --]
[-- Type: text/plain, Size: 4816 bytes --]

In the following scenario:

code path 1:
  my_function() -> lock(L1); ...; cancel_work_sync(my_work)
  [or cancel_rearming_delayed_work(my_work)]

code path 2:
  run_workqueue() -> my_work.f() -> ...; lock(L1); ...

you can get a deadlock if my_work.f() is running but my_function()
has acquired L1 already. This patch adds a pseudo-lock to each
struct work_struct to make lockdep warn about this scenario.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

---
 include/linux/lockdep.h   |    8 ++++++++
 include/linux/workqueue.h |   29 +++++++++++++++++++++++++++++
 kernel/workqueue.c        |   16 ++++++++++++++++
 3 files changed, 53 insertions(+)

--- linux-2.6-git.orig/include/linux/workqueue.h	2007-07-05 13:01:33.978155045 +0200
+++ linux-2.6-git/include/linux/workqueue.h	2007-07-05 13:07:40.969155045 +0200
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -28,6 +29,9 @@ struct work_struct {
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
@@ -41,10 +45,23 @@ struct execute_work {
 	struct work_struct work;
 };
 
+#ifdef CONFIG_LOCKDEP
+/*
+ * NB: because we have to copy the lockdep_map, setting _key
+ * here is required, otherwise it could get initialised to the
+ * copy of the lockdep_map!
+ */
+#define __WORK_INIT_LOCKDEP_MAP(n, k) \
+	.lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
+#else
+#define __WORK_INIT_LOCKDEP_MAP(n, k)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_INIT(),				\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
+	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
 	}
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
@@ -76,12 +93,24 @@ struct execute_work {
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
+#ifdef CONFIG_LOCKDEP
 #define INIT_WORK(_work, _func)						\
 	do {								\
+		static struct lock_class_key __key;			\
+									\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
+		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
+#else
+#define INIT_WORK(_work, _func)						\
+	do {								\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+#endif
 
 #define INIT_DELAYED_WORK(_work, _func)				\
 	do {							\
--- linux-2.6-git.orig/kernel/workqueue.c	2007-07-05 13:01:55.728155045 +0200
+++ linux-2.6-git/kernel/workqueue.c	2007-07-05 13:03:40.882155045 +0200
@@ -254,6 +254,17 @@ static void run_workqueue(struct cpu_wor
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
+#ifdef CONFIG_LOCKDEP
+		/*
+		 * It is permissible to free the struct work_struct
+		 * from inside the function that is called from it,
+		 * this we need to take into account for lockdep too.
+		 * To avoid bogus "held lock freed" warnings as well
+		 * as problems when looking into work->lockdep_map,
+		 * make a copy and use that here.
+		 */
+		struct lockdep_map lockdep_map = work->lockdep_map;
+#endif
 
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
@@ -262,7 +273,9 @@ static void run_workqueue(struct cpu_wor
 		BUG_ON(get_wq_data(work) != cwq);
 		work_clear_pending(work);
 		lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+		lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 		f(work);
+		lock_release(&lockdep_map, 1, _THIS_IP_);
 		lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
@@ -454,6 +467,9 @@ static void wait_on_work(struct work_str
 
 	might_sleep();
 
+	lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&work->lockdep_map, 1, _THIS_IP_);
+
 	cwq = get_wq_data(work);
 	if (!cwq)
 		return;
--- linux-2.6-git.orig/include/linux/lockdep.h	2007-07-05 13:01:34.043155045 +0200
+++ linux-2.6-git/include/linux/lockdep.h	2007-07-05 13:03:40.901155045 +0200
@@ -223,6 +223,14 @@ extern void lockdep_init_map(struct lock
 				 (lock)->dep_map.key, sub)
 
 /*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+
+/*
  * Acquire a lock.
  *
  * Values for "read":

-- 

-- 


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH (updated) 1/2] workqueue: debug flushing deadlocks with lockdep
  2007-07-17 12:53 ` [PATCH (resend) 1/2] workqueue: debug flushing deadlocks with lockdep Johannes Berg
@ 2007-07-18 17:28   ` Johannes Berg
  0 siblings, 0 replies; 4+ messages in thread
From: Johannes Berg @ 2007-07-18 17:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Oleg Nesterov, Ingo Molnar, Peter Zijlstra

In the following scenario:

code path 1:
  my_function() -> lock(L1); ...; flush_workqueue(); ...

code path 2:
  run_workqueue() -> my_work() -> ...; lock(L1); ...

you can get a deadlock when my_work() is queued or running
but my_function() has acquired L1 already.

This patch adds a pseudo-lock to each workqueue to make lockdep
warn about this scenario.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

---
This one applies against the current kernel, patch 2 still applies as-is.

 include/linux/workqueue.h |   20 +++++++++++++++++---
 kernel/workqueue.c        |   20 +++++++++++++++++---
 2 files changed, 34 insertions(+), 6 deletions(-)

--- wireless-dev.orig/kernel/workqueue.c	2007-07-18 14:25:20.792900849 +0200
+++ wireless-dev/kernel/workqueue.c	2007-07-18 14:25:50.462900849 +0200
@@ -32,6 +32,7 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -257,7 +261,9 @@ static void run_workqueue(struct cpu_wor
 
 		BUG_ON(get_wq_data(work) != cwq);
 		work_clear_pending(work);
+		lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 		f(work);
+		lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -376,6 +382,8 @@ void fastcall flush_workqueue(struct wor
 	int cpu;
 
 	might_sleep();
+	lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&wq->lockdep_map, 1, _THIS_IP_);
 	for_each_cpu_mask(cpu, *cpu_map)
 		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
@@ -695,8 +703,10 @@ static void start_workqueue_thread(struc
 	}
 }
 
-struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread, int freezeable)
+struct workqueue_struct *__create_workqueue_key(const char *name,
+						int singlethread,
+						int freezeable,
+						struct lock_class_key *key)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -713,6 +723,7 @@ struct workqueue_struct *__create_workqu
 	}
 
 	wq->name = name;
+	lockdep_init_map(&wq->lockdep_map, name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
@@ -741,7 +752,7 @@ struct workqueue_struct *__create_workqu
 	}
 	return wq;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
@@ -752,6 +763,9 @@ static void cleanup_workqueue_thread(str
 	if (cwq->thread == NULL)
 		return;
 
+	lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
 	flush_cpu_workqueue(cwq);
 	/*
 	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
--- wireless-dev.orig/include/linux/workqueue.h	2007-07-18 14:25:20.852900849 +0200
+++ wireless-dev/include/linux/workqueue.h	2007-07-18 14:25:50.462900849 +0200
@@ -118,9 +118,23 @@ struct execute_work {
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
-extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread,
-						    int freezeable);
+extern struct workqueue_struct *
+__create_workqueue_key(const char *name, int singlethread,
+		       int freezeable, struct lock_class_key *key);
+
+#ifdef CONFIG_LOCKDEP
+#define __create_workqueue(name, singlethread, freezeable)	\
+({								\
+	static struct lock_class_key __key;			\
+								\
+	__create_workqueue_key((name), (singlethread),		\
+			       (freezeable), &__key);		\
+})
+#else
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), NULL)
+#endif
+
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-07-18 17:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-07-17 12:53 [PATCH (resend) 0/2] workqueue lockup debugging Johannes Berg
2007-07-17 12:53 ` [PATCH (resend) 1/2] workqueue: debug flushing deadlocks with lockdep Johannes Berg
2007-07-18 17:28   ` [PATCH (updated) " Johannes Berg
2007-07-17 12:53 ` [PATCH (resend) 2/2] workqueue: debug work related " Johannes Berg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).