All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] PM: Acquire device locks on suspend
@ 2008-01-05 18:36 Rafael J. Wysocki
  0 siblings, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 18:36 UTC (permalink / raw)
  To: Greg KH, Andrew Morton; +Cc: LKML, ACPI Devel Maling List, Ingo Molnar, pm list

Greg, Andrew,

The appended patch is a replacement for
gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch that deadlocked
suspend and hibernation on some systems.

Please consider for applying.

Thanks,
Rafael

---
From: Alan Stern <stern@rowland.harvard.edu>, Rafael J. Wysocki <rjw@sisk.pl>

This patch reorganizes the way suspend and resume notifications are
sent to drivers.  The major changes are that now the PM core acquires
every device semaphore before calling the methods, and calls to
device_add() during suspends will fail.

It also provides a way to safely remove a suspended device with the help of
the PM core, by using the destroy_suspended_device() callback introduced
specifically for this purpose, and updates two drivers (msr and cpuid) that need
to do that.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/cpuid.c    |    6 
 arch/x86/kernel/msr.c      |    6 
 drivers/base/core.c        |   67 +++++-
 drivers/base/power/main.c  |  452 ++++++++++++++++++++++++++-------------------
 drivers/base/power/power.h |   12 +
 include/linux/device.h     |    8 
 6 files changed, 352 insertions(+), 199 deletions(-)

Index: linux-2.6/drivers/base/core.c
===================================================================
--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -726,11 +726,20 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error;
+
+	error = pm_sleep_lock();
+	if (error) {
+		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
+		dump_stack();
+		return error;
+	}
 
 	dev = get_device(dev);
-	if (!dev || !strlen(dev->bus_id))
-		goto Error;
+	if (!dev || !strlen(dev->bus_id)) {
+		error = -EINVAL;
+		goto Done;
+	}
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
@@ -795,6 +804,7 @@ int device_add(struct device *dev)
 	}
  Done:
 	put_device(dev);
+	pm_sleep_unlock();
 	return error;
  BusError:
 	device_pm_remove(dev);
@@ -905,6 +915,13 @@ void device_del(struct device * dev)
 	struct device * parent = dev->parent;
 	struct class_interface *class_intf;
 
+	if (pm_sleep_lock()) {
+		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
+		dump_stack();
+	} else {
+		pm_sleep_unlock();
+	}
+
 	if (parent)
 		klist_del(&dev->knode_parent);
 	if (MAJOR(dev->devt))
@@ -1156,14 +1173,11 @@ error:
 EXPORT_SYMBOL_GPL(device_create);
 
 /**
- * device_destroy - removes a device that was created with device_create()
+ * find_device - finds a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
  * @devt: the dev_t of the device that was previously registered
- *
- * This call unregisters and cleans up a device that was created with a
- * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+static struct device *find_device(struct class *class, dev_t devt)
 {
 	struct device *dev = NULL;
 	struct device *dev_tmp;
@@ -1176,12 +1190,49 @@ void device_destroy(struct class *class,
 		}
 	}
 	up(&class->sem);
+	return dev;
+}
+
+/**
+ * device_destroy - removes a device that was created with device_create()
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call unregisters and cleans up a device that was created with a
+ * call to device_create().
+ */
+void device_destroy(struct class *class, dev_t devt)
+{
+	struct device *dev;
 
+	dev = find_device(class, devt);
 	if (dev)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
 
+#ifdef CONFIG_PM_SLEEP
+/**
+ * destroy_suspended_device - asks the PM core to remove a suspended device
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call causes the PM core to release and unregister a suspended device
+ * created with a call to device_create() (devices cannot be unregistered
+ * directly while suspended, since the PM core holds their semaphores at that
+ * time).
+ */
+void destroy_suspended_device(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
+	if (dev)
+		device_pm_destroy_suspended(dev);
+}
+EXPORT_SYMBOL_GPL(destroy_suspended_device);
+#endif /* CONFIG_PM_SLEEP */
+
 /**
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -521,6 +521,14 @@ extern struct device *device_create(stru
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
+#ifdef CONFIG_PM_SLEEP
+extern void destroy_suspended_device(struct class *cls, dev_t devt);
+#else /* !CONFIG_PM_SLEEP */
+static inline void destroy_suspended_device(struct class *cls, dev_t devt)
+{
+	device_destroy(cls, devt);
+}
+#endif /* !CONFIG_PM_SLEEP */
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
Index: linux-2.6/drivers/base/power/power.h
===================================================================
--- linux-2.6.orig/drivers/base/power/power.h
+++ linux-2.6/drivers/base/power/power.h
@@ -20,6 +20,9 @@ static inline struct device *to_device(s
 
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
+extern void device_pm_destroy_suspended(struct device *);
+extern int pm_sleep_lock(void);
+extern void pm_sleep_unlock(void);
 
 #else /* CONFIG_PM_SLEEP */
 
@@ -32,6 +35,15 @@ static inline void device_pm_remove(stru
 {
 }
 
+static inline int pm_sleep_lock(void)
+{
+	return 0;
+}
+
+static inline void pm_sleep_unlock(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PM
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -24,17 +24,38 @@
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/resume-trace.h>
+#include <linux/rwsem.h>
 
 #include "../base.h"
 #include "power.h"
 
+/*
+ * The entries in the dpm_active list are in a depth first order, simply
+ * because children are guaranteed to be discovered after parents, and
+ * are inserted at the back of the list on discovery.
+ *
+ * All the other lists are kept in the same order, for consistency.
+ * However the lists aren't always traversed in the same order.
+ * Semaphores must be acquired from the top (i.e., front) down
+ * and released in the opposite order.  Devices must be suspended
+ * from the bottom (i.e., end) up and resumed in the opposite order.
+ * That way no parent will be suspended while it still has an active
+ * child.
+ *
+ * Since device_pm_add() may be called with a device semaphore held,
+ * we must never try to acquire a device semaphore while holding
+ * dpm_list_mutex.
+ */
+
 LIST_HEAD(dpm_active);
+static LIST_HEAD(dpm_locked);
 static LIST_HEAD(dpm_off);
 static LIST_HEAD(dpm_off_irq);
 
-static DEFINE_MUTEX(dpm_mtx);
 static DEFINE_MUTEX(dpm_list_mtx);
 
+static DECLARE_RWSEM(pm_sleep_rwsem);
+
 int (*platform_enable_wakeup)(struct device *dev, int is_on);
 
 
@@ -53,29 +74,124 @@ void device_pm_remove(struct device *dev
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
+
+	/* Don't remove a device while the PM core has it locked for suspend */
+	down(&dev->sem);
 	mutex_lock(&dpm_list_mtx);
 	dpm_sysfs_remove(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+}
+
+void device_pm_destroy_suspended(struct device *dev)
+{
+	pr_debug("PM: Removing suspended device %s:%s\n",
+		 dev->bus ? dev->bus->name : "No Bus",
+		 kobject_name(&dev->kobj));
+	mutex_lock(&dpm_list_mtx);
+	list_del_init(&dev->power.entry);
+	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+	device_unregister(dev);
+}
+
+/**
+ *	pm_sleep_lock - mutual exclusion for registration and suspend
+ *
+ *	Returns 0 if no suspend is underway and device registration
+ *	may proceed, otherwise -EBUSY.
+ */
+int pm_sleep_lock(void)
+{
+	if (down_read_trylock(&pm_sleep_rwsem))
+		return 0;
+	return -EBUSY;
+}
+
+/**
+ *	pm_sleep_unlock - mutual exclusion for registration and suspend
+ *
+ *	This routine undoes the effect of device_pm_add_lock
+ *	when a device's registration is complete.
+ */
+void pm_sleep_unlock(void)
+{
+	up_read(&pm_sleep_rwsem);
 }
 
 
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device - Restore state for one device.
+ *	resume_device_early - Power on one device (early resume).
  *	@dev:	Device.
  *
+ *	Must be called with interrupts disabled.
  */
-
-static int resume_device(struct device * dev)
+static int resume_device_early(struct device *dev)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	down(&dev->sem);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *
+ *	Walk the dpm_off_irq list and power each device up. This
+ *	is used for devices that required they be powered down with
+ *	interrupts disabled. As devices are powered on, they are moved
+ *	to the dpm_off list.
+ *
+ *	Interrupts must be disabled when calling this.
+ */
+static void dpm_power_up(void)
+{
+	while (!list_empty(&dpm_off_irq)) {
+		struct list_head *entry = dpm_off_irq.next;
+		struct device *dev = to_device(entry);
+
+		resume_device_early(dev);
+		list_move_tail(entry, &dpm_off);
+	}
+}
+
+/**
+ *	device_power_up - Turn on all devices that need special attention.
+ *
+ *	Power on system devices, then devices that required we shut them down
+ *	with interrupts disabled.
+ *
+ *	Must be called with interrupts disabled.
+ */
+void device_power_up(void)
+{
+	sysdev_resume();
+	dpm_power_up();
+}
+EXPORT_SYMBOL_GPL(device_power_up);
+
+/**
+ *	resume_device - Restore state for one device.
+ *	@dev:	Device.
+ *
+ */
+static int resume_device(struct device *dev)
+{
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
 	if (dev->bus && dev->bus->resume) {
 		dev_dbg(dev,"resuming\n");
@@ -92,126 +208,68 @@ static int resume_device(struct device *
 		error = dev->class->resume(dev);
 	}
 
-	up(&dev->sem);
-
 	TRACE_RESUME(error);
 	return error;
 }
 
-
-static int resume_device_early(struct device * dev)
+/**
+ *	dpm_resume - Resume every device.
+ *
+ *	Resume the devices that have either not gone through
+ *	the late suspend, or that did go through it but also
+ *	went through the early resume.
+ *
+ *	Take devices from the dpm_off_list, resume them,
+ *	and put them on the dpm_locked list.
+ */
+static void dpm_resume(void)
 {
-	int error = 0;
+	while(!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.next;
+		struct device *dev = to_device(entry);
 
-	TRACE_DEVICE(dev);
-	TRACE_RESUME(0);
-	if (dev->bus && dev->bus->resume_early) {
-		dev_dbg(dev,"EARLY resume\n");
-		error = dev->bus->resume_early(dev);
+		resume_device(dev);
+		list_move_tail(entry, &dpm_locked);
 	}
-	TRACE_RESUME(error);
-	return error;
 }
 
-/*
- * Resume the devices that have either not gone through
- * the late suspend, or that did go through it but also
- * went through the early resume
+/**
+ *	unlock_all_devices - Release each device's semaphore
+ *
+ *	Go through the dpm_off list.  Put each device on the dpm_active
+ *	list and unlock it.
  */
-static void dpm_resume(void)
+static void unlock_all_devices(void)
 {
 	mutex_lock(&dpm_list_mtx);
-	while(!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.next;
-		struct device * dev = to_device(entry);
-
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
-
-		mutex_unlock(&dpm_list_mtx);
-		resume_device(dev);
-		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
-	}
+ 	while (!list_empty(&dpm_locked)) {
+ 		struct list_head *entry = dpm_locked.prev;
+ 		struct device *dev = to_device(entry);
+
+ 		list_move(entry, &dpm_active);
+ 		up(&dev->sem);
+ 	}
 	mutex_unlock(&dpm_list_mtx);
 }
 
-
 /**
  *	device_resume - Restore state of each device in system.
  *
- *	Walk the dpm_off list, remove each entry, resume the device,
- *	then add it to the dpm_active list.
+ *	Resume all the devices, unlock them all, and allow new
+ *	devices to be registered once again.
  */
-
 void device_resume(void)
 {
 	might_sleep();
-	mutex_lock(&dpm_mtx);
 	dpm_resume();
-	mutex_unlock(&dpm_mtx);
+	unlock_all_devices();
+	up_write(&pm_sleep_rwsem);
 }
-
 EXPORT_SYMBOL_GPL(device_resume);
 
 
-/**
- *	dpm_power_up - Power on some devices.
- *
- *	Walk the dpm_off_irq list and power each device up. This
- *	is used for devices that required they be powered down with
- *	interrupts disabled. As devices are powered on, they are moved
- *	to the dpm_active list.
- *
- *	Interrupts must be disabled when calling this.
- */
-
-static void dpm_power_up(void)
-{
-	while(!list_empty(&dpm_off_irq)) {
-		struct list_head * entry = dpm_off_irq.next;
-		struct device * dev = to_device(entry);
-
-		list_move_tail(entry, &dpm_off);
-		resume_device_early(dev);
-	}
-}
-
-
-/**
- *	device_power_up - Turn on all devices that need special attention.
- *
- *	Power on system devices then devices that required we shut them down
- *	with interrupts disabled.
- *	Called with interrupts disabled.
- */
-
-void device_power_up(void)
-{
-	sysdev_resume();
-	dpm_power_up();
-}
-
-EXPORT_SYMBOL_GPL(device_power_up);
-
-
 /*------------------------- Suspend routines -------------------------*/
 
-/*
- * The entries in the dpm_active list are in a depth first order, simply
- * because children are guaranteed to be discovered after parents, and
- * are inserted at the back of the list on discovery.
- *
- * All list on the suspend path are done in reverse order, so we operate
- * on the leaves of the device tree (or forests, depending on how you want
- * to look at it ;) first. As nodes are removed from the back of the list,
- * they are inserted into the front of their destintation lists.
- *
- * Things are the reverse on the resume path - iterations are done in
- * forward order, and nodes are inserted at the back of their destination
- * lists. This way, the ancestors will be accessed before their descendents.
- */
-
 static inline char *suspend_verb(u32 event)
 {
 	switch (event) {
@@ -222,7 +280,6 @@ static inline char *suspend_verb(u32 eve
 	}
 }
 
-
 static void
 suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
 {
@@ -232,16 +289,69 @@ suspend_device_dbg(struct device *dev, p
 }
 
 /**
- *	suspend_device - Save state of one device.
+ *	suspend_device_late - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state:	Power state device is entering.
+ *
+ *	This is called with interrupts off and only a single CPU running.
  */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
+
+	if (dev->bus && dev->bus->suspend_late) {
+		suspend_device_dbg(dev, state, "LATE ");
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
 
-static int suspend_device(struct device * dev, pm_message_t state)
+/**
+ *	device_power_down - Shut down special devices.
+ *	@state:		Power state to enter.
+ *
+ *	Power down devices that require interrupts to be disabled
+ *	and move them from the dpm_off list to the dpm_off_irq list.
+ *	Then power down system devices.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
+ */
+int device_power_down(pm_message_t state)
+{
+	int error = 0;
+
+	while (!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device_late(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not power down device %s: "
+					"error %d\n",
+					kobject_name(&dev->kobj), error);
+			break;
+		}
+		list_move(&dev->power.entry, &dpm_off_irq);
+	}
+
+	if (!error)
+		error = sysdev_suspend(state);
+	if (error)
+		dpm_power_up();
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_power_down);
+
+/**
+ *	suspend_device - Save state of one device.
+ *	@dev:	Device.
+ *	@state:	Power state device is entering.
+ */
+int suspend_device(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
 	if (dev->power.power_state.event) {
 		dev_dbg(dev, "PM: suspend %d-->%d\n",
 			dev->power.power_state.event, state.event);
@@ -264,123 +374,95 @@ static int suspend_device(struct device 
 		error = dev->bus->suspend(dev, state);
 		suspend_report_result(dev->bus->suspend, error);
 	}
-	up(&dev->sem);
 	return error;
 }
 
-
-/*
- * This is called with interrupts off, only a single CPU
- * running. We can't acquire a mutex or semaphore (and we don't
- * need the protection)
+/**
+ *	dpm_suspend - Suspend every device.
+ *	@state:	Power state to put each device in.
+ *
+ *	Walk the dpm_locked list.  Suspend each device and move it
+ *	to the dpm_off list.
+ *
+ *	(For historical reasons, if it returns -EAGAIN, that used to mean
+ *	that the device would be called again with interrupts disabled.
+ *	These days, we use the "suspend_late()" callback for that, so we
+ *	print a warning and consider it an error).
  */
-static int suspend_device_late(struct device *dev, pm_message_t state)
+static int dpm_suspend(pm_message_t state)
 {
 	int error = 0;
 
-	if (dev->bus && dev->bus->suspend_late) {
-		suspend_device_dbg(dev, state, "LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
+	while (!list_empty(&dpm_locked)) {
+		struct list_head *entry = dpm_locked.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not suspend device %s: "
+					"error %d%s\n",
+					kobject_name(&dev->kobj),
+					error,
+					(error == -EAGAIN ?
+					" (please convert to suspend_late)" :
+					""));
+			break;
+		}
+ 		list_move(&dev->power.entry, &dpm_off);
 	}
+
 	return error;
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
- *	@state:		Power state to put each device in.
- *
- *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to the dpm_off list.
- *
- *	(For historical reasons, if it returns -EAGAIN, that used to mean
- *	that the device would be called again with interrupts disabled.
- *	These days, we use the "suspend_late()" callback for that, so we
- *	print a warning and consider it an error).
- *
- *	If we get a different error, try and back out.
- *
- *	If we hit a failure with any of the devices, call device_resume()
- *	above to bring the suspended devices back to life.
+ *	lock_all_devices - Acquire every device's semaphore
  *
+ *	Go through the dpm_active list. Carefully lock each device's
+ *	semaphore and put it in on the dpm_locked list.
  */
-
-int device_suspend(pm_message_t state)
+static void lock_all_devices(void)
 {
-	int error = 0;
-
-	might_sleep();
-	mutex_lock(&dpm_mtx);
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_active) && error == 0) {
-		struct list_head * entry = dpm_active.prev;
-		struct device * dev = to_device(entry);
-
+	while (!list_empty(&dpm_active)) {
+		struct list_head *entry = dpm_active.next;
+		struct device *dev = to_device(entry);
+
+		/* Required locking order is dev->sem first,
+		 * then dpm_list_mutex.  Hence this awkward code.
+		 */
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
-
-		error = suspend_device(dev, state);
-
+		down(&dev->sem);
 		mutex_lock(&dpm_list_mtx);
 
-		/* Check if the device got removed */
-		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off list */
-			if (!error)
-				list_move(&dev->power.entry, &dpm_off);
-		}
-		if (error)
-			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d%s\n",
-				kobject_name(&dev->kobj), error,
-				error == -EAGAIN ? " (please convert to suspend_late)" : "");
+		if (list_empty(entry))
+			up(&dev->sem);		/* Device was removed */
+		else
+			list_move_tail(entry, &dpm_locked);
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
-		dpm_resume();
-
-	mutex_unlock(&dpm_mtx);
-	return error;
 }
 
-EXPORT_SYMBOL_GPL(device_suspend);
-
 /**
- *	device_power_down - Shut down special devices.
- *	@state:		Power state to enter.
+ *	device_suspend - Save state and stop all devices in system.
  *
- *	Walk the dpm_off_irq list, calling ->power_down() for each device that
- *	couldn't power down the device with interrupts enabled. When we're
- *	done, power down system devices.
+ *	Prevent new devices from being registered, then lock all devices
+ *	and suspend them.
  */
-
-int device_power_down(pm_message_t state)
+int device_suspend(pm_message_t state)
 {
-	int error = 0;
-	struct device * dev;
-
-	while (!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.prev;
-
-		dev = to_device(entry);
-		error = suspend_device_late(dev, state);
-		if (error)
-			goto Error;
-		list_move(&dev->power.entry, &dpm_off_irq);
-	}
+	int error;
 
-	error = sysdev_suspend(state);
- Done:
+	might_sleep();
+	down_write(&pm_sleep_rwsem);
+	lock_all_devices();
+	error = dpm_suspend(state);
+	if (error)
+		device_resume();
 	return error;
- Error:
-	printk(KERN_ERR "Could not power down device %s: "
-		"error %d\n", kobject_name(&dev->kobj), error);
-	dpm_power_up();
-	goto Done;
 }
-
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
Index: linux-2.6/arch/x86/kernel/msr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/msr.c
+++ linux-2.6/arch/x86/kernel/msr.c
@@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callb
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = msr_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		msr_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
Index: linux-2.6/arch/x86/kernel/cpuid.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpuid.c
+++ linux-2.6/arch/x86/kernel/cpuid.c
@@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_cal
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = cpuid_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		cpuid_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-10 16:59                                                         ` Rafael J. Wysocki
  2008-01-10 17:04                                                           ` Alan Stern
@ 2008-01-10 17:04                                                           ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-10 17:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Thu, 10 Jan 2008, Rafael J. Wysocki wrote:

> > > > > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > > > > extra paragraph warning that the routine should never be called except 
> > > > > > within the scope of a system sleep transition.  In practice this means 
> > > > > > it has to be directly or indirectly invoked by a suspend or resume 
> > > > > > method.
> > > > > 
> > > > > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> > > > 
> > > > In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> > > > notification.  Isn't it true that this notification is issued only as
> > > > part of a system sleep transition?
> > > 
> > > Yes, it is.
> > 
> > So it counts as being indirectly invoked by a resume method.
> 
> Rather, by the resume core.  Technically, it's invoked by
> enable_nonboot_cpus(), which is not a resume method literally.

Okay, then the routine should only be called directly or indirectly 
from a suspend or resume method or from the suspend or resume core.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-10 16:59                                                         ` Rafael J. Wysocki
@ 2008-01-10 17:04                                                           ` Alan Stern
  2008-01-10 17:04                                                           ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-10 17:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Thu, 10 Jan 2008, Rafael J. Wysocki wrote:

> > > > > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > > > > extra paragraph warning that the routine should never be called except 
> > > > > > within the scope of a system sleep transition.  In practice this means 
> > > > > > it has to be directly or indirectly invoked by a suspend or resume 
> > > > > > method.
> > > > > 
> > > > > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> > > > 
> > > > In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> > > > notification.  Isn't it true that this notification is issued only as
> > > > part of a system sleep transition?
> > > 
> > > Yes, it is.
> > 
> > So it counts as being indirectly invoked by a resume method.
> 
> Rather, by the resume core.  Technically, it's invoked by
> enable_nonboot_cpus(), which is not a resume method literally.

Okay, then the routine should only be called directly or indirectly 
from a suspend or resume method or from the suspend or resume core.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-10 15:35                                                       ` Alan Stern
@ 2008-01-10 16:59                                                         ` Rafael J. Wysocki
  2008-01-10 17:04                                                           ` Alan Stern
  2008-01-10 17:04                                                           ` Alan Stern
  2008-01-10 16:59                                                         ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-10 16:59 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Thursday, 10 of January 2008, Alan Stern wrote:
> On Thu, 10 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Wednesday, 9 of January 2008, Alan Stern wrote:
> > > On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > > > > the list_move_tail() comes before the resume_device().  It's the same
> > > > > as in dpm_power_up().
> > > > 
> > > > Still, device_pm_schedule_removal() can (in theory) be called concurrently
> > > > with dpm_resume() by another thread and this might corrupt the list without
> > > > the locking.
> > > 
> > > Any thread doing that would be in violation of the restrictions you're 
> > > going to add to the kerneldoc for destroy_suspended_device().
> > > 
> > > However the overhead for the locking isn't critical.  There won't be
> > > any contention (if everything is working right) and it isn't a hot path
> > > anyway.  So you can leave the extra locking in if you want.  But then
> > > you should put it in all the routines where the lists get manipulated,
> > > not just some of them.  That is: device_power_down(), dpm_power_up(),
> > > and even unregister_dropped_devices().
> > 
> > Except for those run on one CPU with interrupts disabled, I think.
> 
> Not unregister_dropped_devices()!

Sure, it will need locking around the check in while().

> > > > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > > > extra paragraph warning that the routine should never be called except 
> > > > > within the scope of a system sleep transition.  In practice this means 
> > > > > it has to be directly or indirectly invoked by a suspend or resume 
> > > > > method.
> > > > 
> > > > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> > > 
> > > In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> > > notification.  Isn't it true that this notification is issued only as
> > > part of a system sleep transition?
> > 
> > Yes, it is.
> 
> So it counts as being indirectly invoked by a resume method.

Rather, by the resume core.  Technically, it's invoked by
enable_nonboot_cpus(), which is not a resume method literally.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-10 15:35                                                       ` Alan Stern
  2008-01-10 16:59                                                         ` Rafael J. Wysocki
@ 2008-01-10 16:59                                                         ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-10 16:59 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Thursday, 10 of January 2008, Alan Stern wrote:
> On Thu, 10 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Wednesday, 9 of January 2008, Alan Stern wrote:
> > > On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > > > > the list_move_tail() comes before the resume_device().  It's the same
> > > > > as in dpm_power_up().
> > > > 
> > > > Still, device_pm_schedule_removal() can (in theory) be called concurrently
> > > > with dpm_resume() by another thread and this might corrupt the list without
> > > > the locking.
> > > 
> > > Any thread doing that would be in violation of the restrictions you're 
> > > going to add to the kerneldoc for destroy_suspended_device().
> > > 
> > > However the overhead for the locking isn't critical.  There won't be
> > > any contention (if everything is working right) and it isn't a hot path
> > > anyway.  So you can leave the extra locking in if you want.  But then
> > > you should put it in all the routines where the lists get manipulated,
> > > not just some of them.  That is: device_power_down(), dpm_power_up(),
> > > and even unregister_dropped_devices().
> > 
> > Except for those run on one CPU with interrupts disabled, I think.
> 
> Not unregister_dropped_devices()!

Sure, it will need locking around the check in while().

> > > > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > > > extra paragraph warning that the routine should never be called except 
> > > > > within the scope of a system sleep transition.  In practice this means 
> > > > > it has to be directly or indirectly invoked by a suspend or resume 
> > > > > method.
> > > > 
> > > > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> > > 
> > > In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> > > notification.  Isn't it true that this notification is issued only as
> > > part of a system sleep transition?
> > 
> > Yes, it is.
> 
> So it counts as being indirectly invoked by a resume method.

Rather, by the resume core.  Technically, it's invoked by
enable_nonboot_cpus(), which is not a resume method literally.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 23:29                                                     ` Rafael J. Wysocki
  2008-01-10 15:35                                                       ` Alan Stern
@ 2008-01-10 15:35                                                       ` Alan Stern
  2008-01-10 16:59                                                         ` Rafael J. Wysocki
  2008-01-10 16:59                                                         ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-10 15:35 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Thu, 10 Jan 2008, Rafael J. Wysocki wrote:

> On Wednesday, 9 of January 2008, Alan Stern wrote:
> > On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > > > the list_move_tail() comes before the resume_device().  It's the same
> > > > as in dpm_power_up().
> > > 
> > > Still, device_pm_schedule_removal() can (in theory) be called concurrently
> > > with dpm_resume() by another thread and this might corrupt the list without
> > > the locking.
> > 
> > Any thread doing that would be in violation of the restrictions you're 
> > going to add to the kerneldoc for destroy_suspended_device().
> > 
> > However the overhead for the locking isn't critical.  There won't be
> > any contention (if everything is working right) and it isn't a hot path
> > anyway.  So you can leave the extra locking in if you want.  But then
> > you should put it in all the routines where the lists get manipulated,
> > not just some of them.  That is: device_power_down(), dpm_power_up(),
> > and even unregister_dropped_devices().
> 
> Except for those run on one CPU with interrupts disabled, I think.

Not unregister_dropped_devices()!

> > > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > > extra paragraph warning that the routine should never be called except 
> > > > within the scope of a system sleep transition.  In practice this means 
> > > > it has to be directly or indirectly invoked by a suspend or resume 
> > > > method.
> > > 
> > > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> > 
> > In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> > notification.  Isn't it true that this notification is issued only as
> > part of a system sleep transition?
> 
> Yes, it is.

So it counts as being indirectly invoked by a resume method.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 23:29                                                     ` Rafael J. Wysocki
@ 2008-01-10 15:35                                                       ` Alan Stern
  2008-01-10 15:35                                                       ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-10 15:35 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Thu, 10 Jan 2008, Rafael J. Wysocki wrote:

> On Wednesday, 9 of January 2008, Alan Stern wrote:
> > On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > > > the list_move_tail() comes before the resume_device().  It's the same
> > > > as in dpm_power_up().
> > > 
> > > Still, device_pm_schedule_removal() can (in theory) be called concurrently
> > > with dpm_resume() by another thread and this might corrupt the list without
> > > the locking.
> > 
> > Any thread doing that would be in violation of the restrictions you're 
> > going to add to the kerneldoc for destroy_suspended_device().
> > 
> > However the overhead for the locking isn't critical.  There won't be
> > any contention (if everything is working right) and it isn't a hot path
> > anyway.  So you can leave the extra locking in if you want.  But then
> > you should put it in all the routines where the lists get manipulated,
> > not just some of them.  That is: device_power_down(), dpm_power_up(),
> > and even unregister_dropped_devices().
> 
> Except for those run on one CPU with interrupts disabled, I think.

Not unregister_dropped_devices()!

> > > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > > extra paragraph warning that the routine should never be called except 
> > > > within the scope of a system sleep transition.  In practice this means 
> > > > it has to be directly or indirectly invoked by a suspend or resume 
> > > > method.
> > > 
> > > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> > 
> > In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> > notification.  Isn't it true that this notification is issued only as
> > part of a system sleep transition?
> 
> Yes, it is.

So it counts as being indirectly invoked by a resume method.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 22:46                                                   ` Alan Stern
@ 2008-01-09 23:29                                                     ` Rafael J. Wysocki
  2008-01-10 15:35                                                       ` Alan Stern
  2008-01-10 15:35                                                       ` Alan Stern
  2008-01-09 23:29                                                     ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-09 23:29 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Wednesday, 9 of January 2008, Alan Stern wrote:
> On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > > the list_move_tail() comes before the resume_device().  It's the same
> > > as in dpm_power_up().
> > 
> > Still, device_pm_schedule_removal() can (in theory) be called concurrently
> > with dpm_resume() by another thread and this might corrupt the list without
> > the locking.
> 
> Any thread doing that would be in violation of the restrictions you're 
> going to add to the kerneldoc for destroy_suspended_device().
> 
> However the overhead for the locking isn't critical.  There won't be
> any contention (if everything is working right) and it isn't a hot path
> anyway.  So you can leave the extra locking in if you want.  But then
> you should put it in all the routines where the lists get manipulated,
> not just some of them.  That is: device_power_down(), dpm_power_up(),
> and even unregister_dropped_devices().

Except for those run on one CPU with interrupts disabled, I think.

> > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > extra paragraph warning that the routine should never be called except 
> > > within the scope of a system sleep transition.  In practice this means 
> > > it has to be directly or indirectly invoked by a suspend or resume 
> > > method.
> > 
> > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> 
> In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> notification.  Isn't it true that this notification is issued only as
> part of a system sleep transition?

Yes, it is.

> We wouldn't want to allow destroy_suspended_device() to be called when an
> arbitrary CPU hotplug notification occurs.

Of course.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 22:46                                                   ` Alan Stern
  2008-01-09 23:29                                                     ` Rafael J. Wysocki
@ 2008-01-09 23:29                                                     ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-09 23:29 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Wednesday, 9 of January 2008, Alan Stern wrote:
> On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > > the list_move_tail() comes before the resume_device().  It's the same
> > > as in dpm_power_up().
> > 
> > Still, device_pm_schedule_removal() can (in theory) be called concurrently
> > with dpm_resume() by another thread and this might corrupt the list without
> > the locking.
> 
> Any thread doing that would be in violation of the restrictions you're 
> going to add to the kerneldoc for destroy_suspended_device().
> 
> However the overhead for the locking isn't critical.  There won't be
> any contention (if everything is working right) and it isn't a hot path
> anyway.  So you can leave the extra locking in if you want.  But then
> you should put it in all the routines where the lists get manipulated,
> not just some of them.  That is: device_power_down(), dpm_power_up(),
> and even unregister_dropped_devices().

Except for those run on one CPU with interrupts disabled, I think.

> > > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > > extra paragraph warning that the routine should never be called except 
> > > within the scope of a system sleep transition.  In practice this means 
> > > it has to be directly or indirectly invoked by a suspend or resume 
> > > method.
> > 
> > Or by a CPU hotplug notifier (that will be the majority of cases, IMO).
> 
> In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
> notification.  Isn't it true that this notification is issued only as
> part of a system sleep transition?

Yes, it is.

> We wouldn't want to allow destroy_suspended_device() to be called when an
> arbitrary CPU hotplug notification occurs.

Of course.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 22:14                                                 ` Rafael J. Wysocki
  2008-01-09 22:46                                                   ` Alan Stern
@ 2008-01-09 22:46                                                   ` Alan Stern
  2008-01-09 23:29                                                     ` Rafael J. Wysocki
  2008-01-09 23:29                                                     ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-09 22:46 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:

> > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > the list_move_tail() comes before the resume_device().  It's the same
> > as in dpm_power_up().
> 
> Still, device_pm_schedule_removal() can (in theory) be called concurrently
> with dpm_resume() by another thread and this might corrupt the list without
> the locking.

Any thread doing that would be in violation of the restrictions you're 
going to add to the kerneldoc for destroy_suspended_device().

However the overhead for the locking isn't critical.  There won't be
any contention (if everything is working right) and it isn't a hot path
anyway.  So you can leave the extra locking in if you want.  But then
you should put it in all the routines where the lists get manipulated,
not just some of them.  That is: device_power_down(), dpm_power_up(),
and even unregister_dropped_devices().

> > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > extra paragraph warning that the routine should never be called except 
> > within the scope of a system sleep transition.  In practice this means 
> > it has to be directly or indirectly invoked by a suspend or resume 
> > method.
> 
> Or by a CPU hotplug notifier (that will be the majority of cases, IMO).

In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
notification.  Isn't it true that this notification is issued only as
part of a system sleep transition?  We wouldn't want to allow
destroy_suspended_device() to be called when an arbitrary CPU hotplug
notification occurs.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 22:14                                                 ` Rafael J. Wysocki
@ 2008-01-09 22:46                                                   ` Alan Stern
  2008-01-09 22:46                                                   ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-09 22:46 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Wed, 9 Jan 2008, Rafael J. Wysocki wrote:

> > In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> > the list_move_tail() comes before the resume_device().  It's the same
> > as in dpm_power_up().
> 
> Still, device_pm_schedule_removal() can (in theory) be called concurrently
> with dpm_resume() by another thread and this might corrupt the list without
> the locking.

Any thread doing that would be in violation of the restrictions you're 
going to add to the kerneldoc for destroy_suspended_device().

However the overhead for the locking isn't critical.  There won't be
any contention (if everything is working right) and it isn't a hot path
anyway.  So you can leave the extra locking in if you want.  But then
you should put it in all the routines where the lists get manipulated,
not just some of them.  That is: device_power_down(), dpm_power_up(),
and even unregister_dropped_devices().

> > Also, the kerneldoc for destroy_suspended_device() should contain an 
> > extra paragraph warning that the routine should never be called except 
> > within the scope of a system sleep transition.  In practice this means 
> > it has to be directly or indirectly invoked by a suspend or resume 
> > method.
> 
> Or by a CPU hotplug notifier (that will be the majority of cases, IMO).

In your patch the call is made in response to a CPU_UP_CANCELED_FROZEN
notification.  Isn't it true that this notification is issued only as
part of a system sleep transition?  We wouldn't want to allow
destroy_suspended_device() to be called when an arbitrary CPU hotplug
notification occurs.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 21:01                                               ` Alan Stern
@ 2008-01-09 22:14                                                 ` Rafael J. Wysocki
  2008-01-09 22:46                                                   ` Alan Stern
  2008-01-09 22:46                                                   ` Alan Stern
  2008-01-09 22:14                                                 ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-09 22:14 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Wednesday, 9 of January 2008, Alan Stern wrote:
> On Tue, 8 Jan 2008, Rafael J. Wysocki wrote:
> 
> > Appended is what I managed to put together today.
> > 
> > It probably still has some problems, but I'm not seeing them right now (too
> > tired).  At least, it doesn't break my system. ;-)
> > 
> > Please review.
> 
> Okay, this seems to be better.  I like the way the complicated tests 
> are all localized in power/main.c.
> 
> In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> the list_move_tail() comes before the resume_device().  It's the same
> as in dpm_power_up().

Still, device_pm_schedule_removal() can (in theory) be called concurrently
with dpm_resume() by another thread and this might corrupt the list without
the locking.

> The same is true for dpm_suspend().  Once all the device have been 
> locked, there shouldn't be any other tasks accessing the dpm lists.  
> Hence there should be no need to protect the list.

Except for against theoretical races with device_pm_schedule_removal().

> Which reminds me, the kerneldoc for device_pm_schedule_removal() is 
> inaccurate.  The routine always just moves the device to dpm_destroy 
> list for later processing.

Correct.

> Also, the kerneldoc for destroy_suspended_device() should contain an 
> extra paragraph warning that the routine should never be called except 
> within the scope of a system sleep transition.  In practice this means 
> it has to be directly or indirectly invoked by a suspend or resume 
> method.

Or by a CPU hotplug notifier (that will be the majority of cases, IMO).

> It looks good.

Thanks for the review.

I'll fix the comments and repost the patch from scratch for merging in a
separate thread.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-09 21:01                                               ` Alan Stern
  2008-01-09 22:14                                                 ` Rafael J. Wysocki
@ 2008-01-09 22:14                                                 ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-09 22:14 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Wednesday, 9 of January 2008, Alan Stern wrote:
> On Tue, 8 Jan 2008, Rafael J. Wysocki wrote:
> 
> > Appended is what I managed to put together today.
> > 
> > It probably still has some problems, but I'm not seeing them right now (too
> > tired).  At least, it doesn't break my system. ;-)
> > 
> > Please review.
> 
> Okay, this seems to be better.  I like the way the complicated tests 
> are all localized in power/main.c.
> 
> In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
> the list_move_tail() comes before the resume_device().  It's the same
> as in dpm_power_up().

Still, device_pm_schedule_removal() can (in theory) be called concurrently
with dpm_resume() by another thread and this might corrupt the list without
the locking.

> The same is true for dpm_suspend().  Once all the device have been 
> locked, there shouldn't be any other tasks accessing the dpm lists.  
> Hence there should be no need to protect the list.

Except for against theoretical races with device_pm_schedule_removal().

> Which reminds me, the kerneldoc for device_pm_schedule_removal() is 
> inaccurate.  The routine always just moves the device to dpm_destroy 
> list for later processing.

Correct.

> Also, the kerneldoc for destroy_suspended_device() should contain an 
> extra paragraph warning that the routine should never be called except 
> within the scope of a system sleep transition.  In practice this means 
> it has to be directly or indirectly invoked by a suspend or resume 
> method.

Or by a CPU hotplug notifier (that will be the majority of cases, IMO).

> It looks good.

Thanks for the review.

I'll fix the comments and repost the patch from scratch for merging in a
separate thread.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-08  0:25                                             ` Rafael J. Wysocki
@ 2008-01-09 21:01                                               ` Alan Stern
  2008-01-09 22:14                                                 ` Rafael J. Wysocki
  2008-01-09 22:14                                                 ` Rafael J. Wysocki
  2008-01-09 21:01                                               ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-09 21:01 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Tue, 8 Jan 2008, Rafael J. Wysocki wrote:

> Appended is what I managed to put together today.
> 
> It probably still has some problems, but I'm not seeing them right now (too
> tired).  At least, it doesn't break my system. ;-)
> 
> Please review.

Okay, this seems to be better.  I like the way the complicated tests 
are all localized in power/main.c.

In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
the list_move_tail() comes before the resume_device().  It's the same
as in dpm_power_up().

The same is true for dpm_suspend().  Once all the device have been 
locked, there shouldn't be any other tasks accessing the dpm lists.  
Hence there should be no need to protect the list.

Which reminds me, the kerneldoc for device_pm_schedule_removal() is 
inaccurate.  The routine always just moves the device to dpm_destroy 
list for later processing.

Also, the kerneldoc for destroy_suspended_device() should contain an 
extra paragraph warning that the routine should never be called except 
within the scope of a system sleep transition.  In practice this means 
it has to be directly or indirectly invoked by a suspend or resume 
method.

It looks good.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-08  0:25                                             ` Rafael J. Wysocki
  2008-01-09 21:01                                               ` Alan Stern
@ 2008-01-09 21:01                                               ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-09 21:01 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Tue, 8 Jan 2008, Rafael J. Wysocki wrote:

> Appended is what I managed to put together today.
> 
> It probably still has some problems, but I'm not seeing them right now (too
> tired).  At least, it doesn't break my system. ;-)
> 
> Please review.

Okay, this seems to be better.  I like the way the complicated tests 
are all localized in power/main.c.

In dpm_resume() you shouldn't need to use dpm_list_mtx at all, because
the list_move_tail() comes before the resume_device().  It's the same
as in dpm_power_up().

The same is true for dpm_suspend().  Once all the device have been 
locked, there shouldn't be any other tasks accessing the dpm lists.  
Hence there should be no need to protect the list.

Which reminds me, the kerneldoc for device_pm_schedule_removal() is 
inaccurate.  The routine always just moves the device to dpm_destroy 
list for later processing.

Also, the kerneldoc for destroy_suspended_device() should contain an 
extra paragraph warning that the routine should never be called except 
within the scope of a system sleep transition.  In practice this means 
it has to be directly or indirectly invoked by a suspend or resume 
method.

It looks good.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 21:32                                           ` Alan Stern
@ 2008-01-08  0:25                                             ` Rafael J. Wysocki
  2008-01-09 21:01                                               ` Alan Stern
  2008-01-09 21:01                                               ` Alan Stern
  2008-01-08  0:25                                             ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-08  0:25 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Monday, 7 of January 2008, Alan Stern wrote:
> On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
[--snip--]
> 
> > Okay, well, now I'm leaning towards the asynchronous approach.
> > 
> > I'll prepare a new patch and send it later today.
> 
> Okay.

Appended is what I managed to put together today.

It probably still has some problems, but I'm not seeing them right now (too
tired).  At least, it doesn't break my system. ;-)

Please review.

Thanks,
Rafael

---
From: Alan Stern <stern@rowland.harvard.edu>, Rafael J. Wysocki <rjw@sisk.pl>

This patch reorganizes the way suspend and resume notifications are
sent to drivers.  The major changes are that now the PM core acquires
every device semaphore before calling the methods, and calls to
device_add() during suspends will fail, while calls to device_del()
during suspends will block.

It also provides a way to safely remove a suspended device with the
help of the PM core, by using the device_pm_schedule_removal() callback
introduced specifically for this purpose, and updates two drivers (msr
and cpuid) that need to use it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/cpuid.c    |    6 
 arch/x86/kernel/msr.c      |    6 
 drivers/base/core.c        |   60 ++++-
 drivers/base/power/main.c  |  504 +++++++++++++++++++++++++++++----------------
 drivers/base/power/power.h |   12 +
 include/linux/device.h     |    8 
 6 files changed, 408 insertions(+), 188 deletions(-)

Index: linux-2.6/drivers/base/core.c
===================================================================
--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -726,11 +726,20 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error;
+
+	error = pm_sleep_lock();
+	if (error) {
+		dev_warn(dev, "Suspicious %s during suspend\n", __FUNCTION__);
+		dump_stack();
+		return error;
+	}
 
 	dev = get_device(dev);
-	if (!dev || !strlen(dev->bus_id))
+	if (!dev || !strlen(dev->bus_id)) {
+		error = -EINVAL;
 		goto Error;
+	}
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
@@ -795,6 +804,7 @@ int device_add(struct device *dev)
 	}
  Done:
 	put_device(dev);
+	pm_sleep_unlock();
 	return error;
  BusError:
 	device_pm_remove(dev);
@@ -905,6 +915,7 @@ void device_del(struct device * dev)
 	struct device * parent = dev->parent;
 	struct class_interface *class_intf;
 
+	device_pm_remove(dev);
 	if (parent)
 		klist_del(&dev->knode_parent);
 	if (MAJOR(dev->devt))
@@ -981,7 +992,6 @@ void device_del(struct device * dev)
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->bus_notifier,
 					     BUS_NOTIFY_DEL_DEVICE, dev);
-	device_pm_remove(dev);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	kobject_del(&dev->kobj);
 	if (parent)
@@ -1156,14 +1166,11 @@ error:
 EXPORT_SYMBOL_GPL(device_create);
 
 /**
- * device_destroy - removes a device that was created with device_create()
+ * find_device - finds a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
  * @devt: the dev_t of the device that was previously registered
- *
- * This call unregisters and cleans up a device that was created with a
- * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+static struct device *find_device(struct class *class, dev_t devt)
 {
 	struct device *dev = NULL;
 	struct device *dev_tmp;
@@ -1176,12 +1183,49 @@ void device_destroy(struct class *class,
 		}
 	}
 	up(&class->sem);
+	return dev;
+}
 
+/**
+ * device_destroy - removes a device that was created with device_create()
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call unregisters and cleans up a device that was created with a
+ * call to device_create().
+ */
+void device_destroy(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
 	if (dev)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
 
+#ifdef CONFIG_PM_SLEEP
+/**
+ * destroy_suspended_device - asks the PM core to remove a suspended device
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call notifies the PM core of the necessity to unregister a suspended
+ * device created with a call to device_create() (devices cannot be
+ * unregistered directly while suspended, since the PM core holds their
+ * semaphores at that time).
+ */
+void destroy_suspended_device(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
+	if (dev)
+		device_pm_schedule_removal(dev);
+}
+EXPORT_SYMBOL_GPL(destroy_suspended_device);
+#endif /* CONFIG_PM_SLEEP */
+
 /**
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -521,6 +521,14 @@ extern struct device *device_create(stru
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
+#ifdef CONFIG_PM_SLEEP
+extern void destroy_suspended_device(struct class *cls, dev_t devt);
+#else /* !CONFIG_PM_SLEEP */
+static inline void destroy_suspended_device(struct class *cls, dev_t devt)
+{
+	device_destroy(cls, devt);
+}
+#endif /* !CONFIG_PM_SLEEP */
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
Index: linux-2.6/drivers/base/power/power.h
===================================================================
--- linux-2.6.orig/drivers/base/power/power.h
+++ linux-2.6/drivers/base/power/power.h
@@ -20,6 +20,9 @@ static inline struct device *to_device(s
 
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
+extern void device_pm_schedule_removal(struct device *);
+extern int pm_sleep_lock(void);
+extern void pm_sleep_unlock(void);
 
 #else /* CONFIG_PM_SLEEP */
 
@@ -32,6 +35,15 @@ static inline void device_pm_remove(stru
 {
 }
 
+static inline int pm_sleep_lock(void)
+{
+	return 0;
+}
+
+static inline void pm_sleep_unlock(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PM
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -24,20 +24,45 @@
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/resume-trace.h>
+#include <linux/rwsem.h>
 
 #include "../base.h"
 #include "power.h"
 
+/*
+ * The entries in the dpm_active list are in a depth first order, simply
+ * because children are guaranteed to be discovered after parents, and
+ * are inserted at the back of the list on discovery.
+ *
+ * All the other lists are kept in the same order, for consistency.
+ * However the lists aren't always traversed in the same order.
+ * Semaphores must be acquired from the top (i.e., front) down
+ * and released in the opposite order.  Devices must be suspended
+ * from the bottom (i.e., end) up and resumed in the opposite order.
+ * That way no parent will be suspended while it still has an active
+ * child.
+ *
+ * Since device_pm_add() may be called with a device semaphore held,
+ * we must never try to acquire a device semaphore while holding
+ * dpm_list_mutex.
+ */
+
 LIST_HEAD(dpm_active);
+static LIST_HEAD(dpm_locked);
 static LIST_HEAD(dpm_off);
 static LIST_HEAD(dpm_off_irq);
+static LIST_HEAD(dpm_destroy);
 
-static DEFINE_MUTEX(dpm_mtx);
 static DEFINE_MUTEX(dpm_list_mtx);
 
-int (*platform_enable_wakeup)(struct device *dev, int is_on);
+static DECLARE_RWSEM(pm_sleep_rwsem);
 
+int (*platform_enable_wakeup)(struct device *dev, int is_on);
 
+/**
+ *	device_pm_add - add a device to the list of active devices
+ *	@dev:	Device to be added to the list
+ */
 void device_pm_add(struct device *dev)
 {
 	pr_debug("PM: Adding info for %s:%s\n",
@@ -48,8 +73,36 @@ void device_pm_add(struct device *dev)
 	mutex_unlock(&dpm_list_mtx);
 }
 
+/**
+ *	device_pm_remove - remove a device from the list of active devices
+ *	@dev:	Device to be removed from the list
+ *
+ *	This function also removes the device's PM-related sysfs attributes.
+ */
 void device_pm_remove(struct device *dev)
 {
+	/*
+	 * If this function is called during a suspend, it will be blocked,
+	 * because we're holding the device's semaphore at that time, which may
+	 * lead to a deadlock. In that case we want to print a warning.
+	 * However, it may also be called by unregister_dropped_devices() with
+	 * the device's semaphore released, in which case the warning should
+	 * not be printed.
+	 */
+	if (down_trylock(&dev->sem)) {
+		if (down_read_trylock(&pm_sleep_rwsem)) {
+			/* No suspend in progress, wait on dev->sem */
+			down(&dev->sem);
+			up_read(&pm_sleep_rwsem);
+		} else {
+			/* Suspend in progress, we may deadlock */
+			dev_warn(dev, "Suspicious %s during suspend\n",
+				__FUNCTION__);
+			dump_stack();
+			/* The user has been warned ... */
+			down(&dev->sem);
+		}
+	}
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
@@ -57,25 +110,126 @@ void device_pm_remove(struct device *dev
 	dpm_sysfs_remove(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+}
+
+/**
+ *	device_pm_schedule_removal - schedule the removal of a suspended device
+ *	@dev:	Device to destroy
+ *
+ *	If called during a suspend, it unlocks the device's semaphore and
+ *	unregisters it.  Otherwise, the device is unregistered without
+ *	releasing the semaphore, but a warning is printed and the stack is
+ *	dumped.
+ */
+void device_pm_schedule_removal(struct device *dev)
+{
+	pr_debug("PM: Preparing for removal: %s:%s\n",
+		dev->bus ? dev->bus->name : "No Bus",
+		kobject_name(&dev->kobj));
+	mutex_lock(&dpm_list_mtx);
+	list_move_tail(&dev->power.entry, &dpm_destroy);
+	mutex_unlock(&dpm_list_mtx);
+}
+
+/**
+ *	pm_sleep_lock - mutual exclusion for registration and suspend
+ *
+ *	Returns 0 if no suspend is underway and device registration
+ *	may proceed, otherwise -EBUSY.
+ */
+int pm_sleep_lock(void)
+{
+	if (down_read_trylock(&pm_sleep_rwsem))
+		return 0;
+
+	return -EBUSY;
+}
+
+/**
+ *	pm_sleep_unlock - mutual exclusion for registration and suspend
+ *
+ *	This routine undoes the effect of device_pm_add_lock
+ *	when a device's registration is complete.
+ */
+void pm_sleep_unlock(void)
+{
+	up_read(&pm_sleep_rwsem);
 }
 
 
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device - Restore state for one device.
+ *	resume_device_early - Power on one device (early resume).
  *	@dev:	Device.
  *
+ *	Must be called with interrupts disabled.
  */
-
-static int resume_device(struct device * dev)
+static int resume_device_early(struct device *dev)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	down(&dev->sem);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *
+ *	Walk the dpm_off_irq list and power each device up. This
+ *	is used for devices that required they be powered down with
+ *	interrupts disabled. As devices are powered on, they are moved
+ *	to the dpm_off list.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
+ */
+static void dpm_power_up(void)
+{
+
+	while (!list_empty(&dpm_off_irq)) {
+		struct list_head *entry = dpm_off_irq.next;
+		struct device *dev = to_device(entry);
+
+		list_move_tail(entry, &dpm_off);
+		resume_device_early(dev);
+	}
+}
+
+/**
+ *	device_power_up - Turn on all devices that need special attention.
+ *
+ *	Power on system devices, then devices that required we shut them down
+ *	with interrupts disabled.
+ *
+ *	Must be called with interrupts disabled.
+ */
+void device_power_up(void)
+{
+	sysdev_resume();
+	dpm_power_up();
+}
+EXPORT_SYMBOL_GPL(device_power_up);
+
+/**
+ *	resume_device - Restore state for one device.
+ *	@dev:	Device.
+ *
+ */
+static int resume_device(struct device *dev)
+{
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
 	if (dev->bus && dev->bus->resume) {
 		dev_dbg(dev,"resuming\n");
@@ -92,126 +246,90 @@ static int resume_device(struct device *
 		error = dev->class->resume(dev);
 	}
 
-	up(&dev->sem);
-
-	TRACE_RESUME(error);
-	return error;
-}
-
-
-static int resume_device_early(struct device * dev)
-{
-	int error = 0;
-
-	TRACE_DEVICE(dev);
-	TRACE_RESUME(0);
-	if (dev->bus && dev->bus->resume_early) {
-		dev_dbg(dev,"EARLY resume\n");
-		error = dev->bus->resume_early(dev);
-	}
 	TRACE_RESUME(error);
 	return error;
 }
 
-/*
- * Resume the devices that have either not gone through
- * the late suspend, or that did go through it but also
- * went through the early resume
+/**
+ *	dpm_resume - Resume every device.
+ *
+ *	Resume the devices that have either not gone through
+ *	the late suspend, or that did go through it but also
+ *	went through the early resume.
+ *
+ *	Take devices from the dpm_off_list, resume them,
+ *	and put them on the dpm_locked list.
  */
 static void dpm_resume(void)
 {
 	mutex_lock(&dpm_list_mtx);
 	while(!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.next;
-		struct device * dev = to_device(entry);
-
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
+		struct list_head *entry = dpm_off.next;
+		struct device *dev = to_device(entry);
 
+		list_move_tail(entry, &dpm_locked);
 		mutex_unlock(&dpm_list_mtx);
 		resume_device(dev);
 		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
 }
 
-
 /**
- *	device_resume - Restore state of each device in system.
+ *	unlock_all_devices - Release each device's semaphore
  *
- *	Walk the dpm_off list, remove each entry, resume the device,
- *	then add it to the dpm_active list.
+ *	Go through the dpm_off list.  Put each device on the dpm_active
+ *	list and unlock it.
  */
-
-void device_resume(void)
+static void unlock_all_devices(void)
 {
-	might_sleep();
-	mutex_lock(&dpm_mtx);
-	dpm_resume();
-	mutex_unlock(&dpm_mtx);
+	mutex_lock(&dpm_list_mtx);
+ 	while (!list_empty(&dpm_locked)) {
+ 		struct list_head *entry = dpm_locked.prev;
+ 		struct device *dev = to_device(entry);
+
+ 		list_move(entry, &dpm_active);
+ 		up(&dev->sem);
+ 	}
+	mutex_unlock(&dpm_list_mtx);
 }
 
-EXPORT_SYMBOL_GPL(device_resume);
-
-
 /**
- *	dpm_power_up - Power on some devices.
- *
- *	Walk the dpm_off_irq list and power each device up. This
- *	is used for devices that required they be powered down with
- *	interrupts disabled. As devices are powered on, they are moved
- *	to the dpm_active list.
+ *	unregister_dropped_devices - Unregister devices scheduled for removal
  *
- *	Interrupts must be disabled when calling this.
+ *	Unregister all devices on the dpm_destroy list.
  */
-
-static void dpm_power_up(void)
+static void unregister_dropped_devices(void)
 {
-	while(!list_empty(&dpm_off_irq)) {
-		struct list_head * entry = dpm_off_irq.next;
-		struct device * dev = to_device(entry);
-
-		list_move_tail(entry, &dpm_off);
-		resume_device_early(dev);
+	while (!list_empty(&dpm_destroy)) {
+		struct list_head *entry = dpm_destroy.next;
+		struct device *dev = to_device(entry);
+
+		up(&dev->sem);
+		/* This also removes the device from the list */
+		device_unregister(dev);
 	}
 }
 
-
 /**
- *	device_power_up - Turn on all devices that need special attention.
+ *	device_resume - Restore state of each device in system.
  *
- *	Power on system devices then devices that required we shut them down
- *	with interrupts disabled.
- *	Called with interrupts disabled.
+ *	Resume all the devices, unlock them all, and allow new
+ *	devices to be registered once again.
  */
-
-void device_power_up(void)
+void device_resume(void)
 {
-	sysdev_resume();
-	dpm_power_up();
+	might_sleep();
+	dpm_resume();
+	unlock_all_devices();
+	unregister_dropped_devices();
+	up_write(&pm_sleep_rwsem);
 }
-
-EXPORT_SYMBOL_GPL(device_power_up);
+EXPORT_SYMBOL_GPL(device_resume);
 
 
 /*------------------------- Suspend routines -------------------------*/
 
-/*
- * The entries in the dpm_active list are in a depth first order, simply
- * because children are guaranteed to be discovered after parents, and
- * are inserted at the back of the list on discovery.
- *
- * All list on the suspend path are done in reverse order, so we operate
- * on the leaves of the device tree (or forests, depending on how you want
- * to look at it ;) first. As nodes are removed from the back of the list,
- * they are inserted into the front of their destintation lists.
- *
- * Things are the reverse on the resume path - iterations are done in
- * forward order, and nodes are inserted at the back of their destination
- * lists. This way, the ancestors will be accessed before their descendents.
- */
-
 static inline char *suspend_verb(u32 event)
 {
 	switch (event) {
@@ -222,7 +340,6 @@ static inline char *suspend_verb(u32 eve
 	}
 }
 
-
 static void
 suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
 {
@@ -232,16 +349,73 @@ suspend_device_dbg(struct device *dev, p
 }
 
 /**
- *	suspend_device - Save state of one device.
+ *	suspend_device_late - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state:	Power state device is entering.
+ *
+ *	This is called with interrupts off and only a single CPU running.
+ */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
+
+	if (dev->bus && dev->bus->suspend_late) {
+		suspend_device_dbg(dev, state, "LATE ");
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
+
+/**
+ *	device_power_down - Shut down special devices.
+ *	@state:		Power state to enter.
+ *
+ *	Power down devices that require interrupts to be disabled
+ *	and move them from the dpm_off list to the dpm_off_irq list.
+ *	Then power down system devices.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
  */
+int device_power_down(pm_message_t state)
+{
+	int error = 0;
 
-static int suspend_device(struct device * dev, pm_message_t state)
+	while (!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.prev;
+		struct device *dev = to_device(entry);
+
+		list_del_init(&dev->power.entry);
+		error = suspend_device_late(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not power down device %s: "
+					"error %d\n",
+					kobject_name(&dev->kobj), error);
+			if (list_empty(&dev->power.entry))
+				list_add(&dev->power.entry, &dpm_off);
+			break;
+		}
+		if (list_empty(&dev->power.entry))
+			list_add(&dev->power.entry, &dpm_off_irq);
+	}
+
+	if (!error)
+		error = sysdev_suspend(state);
+	if (error)
+		dpm_power_up();
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_power_down);
+
+/**
+ *	suspend_device - Save state of one device.
+ *	@dev:	Device.
+ *	@state:	Power state device is entering.
+ */
+int suspend_device(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
 	if (dev->power.power_state.event) {
 		dev_dbg(dev, "PM: suspend %d-->%d\n",
 			dev->power.power_state.event, state.event);
@@ -264,123 +438,105 @@ static int suspend_device(struct device 
 		error = dev->bus->suspend(dev, state);
 		suspend_report_result(dev->bus->suspend, error);
 	}
-	up(&dev->sem);
-	return error;
-}
-
-
-/*
- * This is called with interrupts off, only a single CPU
- * running. We can't acquire a mutex or semaphore (and we don't
- * need the protection)
- */
-static int suspend_device_late(struct device *dev, pm_message_t state)
-{
-	int error = 0;
-
-	if (dev->bus && dev->bus->suspend_late) {
-		suspend_device_dbg(dev, state, "LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
-	}
 	return error;
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
- *	@state:		Power state to put each device in.
+ *	dpm_suspend - Suspend every device.
+ *	@state:	Power state to put each device in.
  *
- *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to the dpm_off list.
+ *	Walk the dpm_locked list.  Suspend each device and move it
+ *	to the dpm_off list.
  *
  *	(For historical reasons, if it returns -EAGAIN, that used to mean
  *	that the device would be called again with interrupts disabled.
  *	These days, we use the "suspend_late()" callback for that, so we
  *	print a warning and consider it an error).
- *
- *	If we get a different error, try and back out.
- *
- *	If we hit a failure with any of the devices, call device_resume()
- *	above to bring the suspended devices back to life.
- *
  */
-
-int device_suspend(pm_message_t state)
+static int dpm_suspend(pm_message_t state)
 {
 	int error = 0;
 
-	might_sleep();
-	mutex_lock(&dpm_mtx);
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_active) && error == 0) {
-		struct list_head * entry = dpm_active.prev;
-		struct device * dev = to_device(entry);
+	while (!list_empty(&dpm_locked)) {
+		struct list_head *entry = dpm_locked.prev;
+		struct device *dev = to_device(entry);
 
-		get_device(dev);
+		list_del_init(&dev->power.entry);
 		mutex_unlock(&dpm_list_mtx);
-
 		error = suspend_device(dev, state);
-
-		mutex_lock(&dpm_list_mtx);
-
-		/* Check if the device got removed */
-		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off list */
-			if (!error)
-				list_move(&dev->power.entry, &dpm_off);
-		}
-		if (error)
+		if (error) {
 			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d%s\n",
-				kobject_name(&dev->kobj), error,
-				error == -EAGAIN ? " (please convert to suspend_late)" : "");
-		put_device(dev);
+					"error %d%s\n",
+					kobject_name(&dev->kobj),
+					error,
+					(error == -EAGAIN ?
+					" (please convert to suspend_late)" :
+					""));
+			mutex_lock(&dpm_list_mtx);
+			if (list_empty(&dev->power.entry))
+				list_add(&dev->power.entry, &dpm_locked);
+			mutex_unlock(&dpm_list_mtx);
+			break;
+		}
+		mutex_lock(&dpm_list_mtx);
+		if (list_empty(&dev->power.entry))
+ 			list_add(&dev->power.entry, &dpm_off);
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
-		dpm_resume();
 
-	mutex_unlock(&dpm_mtx);
 	return error;
 }
 
-EXPORT_SYMBOL_GPL(device_suspend);
-
 /**
- *	device_power_down - Shut down special devices.
- *	@state:		Power state to enter.
+ *	lock_all_devices - Acquire every device's semaphore
  *
- *	Walk the dpm_off_irq list, calling ->power_down() for each device that
- *	couldn't power down the device with interrupts enabled. When we're
- *	done, power down system devices.
+ *	Go through the dpm_active list. Carefully lock each device's
+ *	semaphore and put it in on the dpm_locked list.
  */
-
-int device_power_down(pm_message_t state)
+static void lock_all_devices(void)
 {
-	int error = 0;
-	struct device * dev;
-
-	while (!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.prev;
+	mutex_lock(&dpm_list_mtx);
+	while (!list_empty(&dpm_active)) {
+		struct list_head *entry = dpm_active.next;
+		struct device *dev = to_device(entry);
+
+		/* Required locking order is dev->sem first,
+		 * then dpm_list_mutex.  Hence this awkward code.
+		 */
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+		down(&dev->sem);
+		mutex_lock(&dpm_list_mtx);
 
-		dev = to_device(entry);
-		error = suspend_device_late(dev, state);
-		if (error)
-			goto Error;
-		list_move(&dev->power.entry, &dpm_off_irq);
+		if (list_empty(entry))
+			up(&dev->sem);		/* Device was removed */
+		else
+			list_move_tail(entry, &dpm_locked);
+		put_device(dev);
 	}
+	mutex_unlock(&dpm_list_mtx);
+}
 
-	error = sysdev_suspend(state);
- Done:
+/**
+ *	device_suspend - Save state and stop all devices in system.
+ *
+ *	Prevent new devices from being registered, then lock all devices
+ *	and suspend them.
+ */
+int device_suspend(pm_message_t state)
+{
+	int error;
+
+	might_sleep();
+	down_write(&pm_sleep_rwsem);
+	lock_all_devices();
+	error = dpm_suspend(state);
+	if (error)
+		device_resume();
 	return error;
- Error:
-	printk(KERN_ERR "Could not power down device %s: "
-		"error %d\n", kobject_name(&dev->kobj), error);
-	dpm_power_up();
-	goto Done;
 }
-
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
Index: linux-2.6/arch/x86/kernel/msr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/msr.c
+++ linux-2.6/arch/x86/kernel/msr.c
@@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callb
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = msr_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		msr_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
Index: linux-2.6/arch/x86/kernel/cpuid.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpuid.c
+++ linux-2.6/arch/x86/kernel/cpuid.c
@@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_cal
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = cpuid_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		cpuid_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 21:32                                           ` Alan Stern
  2008-01-08  0:25                                             ` Rafael J. Wysocki
@ 2008-01-08  0:25                                             ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-08  0:25 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Monday, 7 of January 2008, Alan Stern wrote:
> On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
[--snip--]
> 
> > Okay, well, now I'm leaning towards the asynchronous approach.
> > 
> > I'll prepare a new patch and send it later today.
> 
> Okay.

Appended is what I managed to put together today.

It probably still has some problems, but I'm not seeing them right now (too
tired).  At least, it doesn't break my system. ;-)

Please review.

Thanks,
Rafael

---
From: Alan Stern <stern@rowland.harvard.edu>, Rafael J. Wysocki <rjw@sisk.pl>

This patch reorganizes the way suspend and resume notifications are
sent to drivers.  The major changes are that now the PM core acquires
every device semaphore before calling the methods, and calls to
device_add() during suspends will fail, while calls to device_del()
during suspends will block.

It also provides a way to safely remove a suspended device with the
help of the PM core, by using the device_pm_schedule_removal() callback
introduced specifically for this purpose, and updates two drivers (msr
and cpuid) that need to use it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/cpuid.c    |    6 
 arch/x86/kernel/msr.c      |    6 
 drivers/base/core.c        |   60 ++++-
 drivers/base/power/main.c  |  504 +++++++++++++++++++++++++++++----------------
 drivers/base/power/power.h |   12 +
 include/linux/device.h     |    8 
 6 files changed, 408 insertions(+), 188 deletions(-)

Index: linux-2.6/drivers/base/core.c
===================================================================
--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -726,11 +726,20 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error;
+
+	error = pm_sleep_lock();
+	if (error) {
+		dev_warn(dev, "Suspicious %s during suspend\n", __FUNCTION__);
+		dump_stack();
+		return error;
+	}
 
 	dev = get_device(dev);
-	if (!dev || !strlen(dev->bus_id))
+	if (!dev || !strlen(dev->bus_id)) {
+		error = -EINVAL;
 		goto Error;
+	}
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
@@ -795,6 +804,7 @@ int device_add(struct device *dev)
 	}
  Done:
 	put_device(dev);
+	pm_sleep_unlock();
 	return error;
  BusError:
 	device_pm_remove(dev);
@@ -905,6 +915,7 @@ void device_del(struct device * dev)
 	struct device * parent = dev->parent;
 	struct class_interface *class_intf;
 
+	device_pm_remove(dev);
 	if (parent)
 		klist_del(&dev->knode_parent);
 	if (MAJOR(dev->devt))
@@ -981,7 +992,6 @@ void device_del(struct device * dev)
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->bus_notifier,
 					     BUS_NOTIFY_DEL_DEVICE, dev);
-	device_pm_remove(dev);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	kobject_del(&dev->kobj);
 	if (parent)
@@ -1156,14 +1166,11 @@ error:
 EXPORT_SYMBOL_GPL(device_create);
 
 /**
- * device_destroy - removes a device that was created with device_create()
+ * find_device - finds a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
  * @devt: the dev_t of the device that was previously registered
- *
- * This call unregisters and cleans up a device that was created with a
- * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+static struct device *find_device(struct class *class, dev_t devt)
 {
 	struct device *dev = NULL;
 	struct device *dev_tmp;
@@ -1176,12 +1183,49 @@ void device_destroy(struct class *class,
 		}
 	}
 	up(&class->sem);
+	return dev;
+}
 
+/**
+ * device_destroy - removes a device that was created with device_create()
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call unregisters and cleans up a device that was created with a
+ * call to device_create().
+ */
+void device_destroy(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
 	if (dev)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
 
+#ifdef CONFIG_PM_SLEEP
+/**
+ * destroy_suspended_device - asks the PM core to remove a suspended device
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call notifies the PM core of the necessity to unregister a suspended
+ * device created with a call to device_create() (devices cannot be
+ * unregistered directly while suspended, since the PM core holds their
+ * semaphores at that time).
+ */
+void destroy_suspended_device(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
+	if (dev)
+		device_pm_schedule_removal(dev);
+}
+EXPORT_SYMBOL_GPL(destroy_suspended_device);
+#endif /* CONFIG_PM_SLEEP */
+
 /**
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -521,6 +521,14 @@ extern struct device *device_create(stru
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
+#ifdef CONFIG_PM_SLEEP
+extern void destroy_suspended_device(struct class *cls, dev_t devt);
+#else /* !CONFIG_PM_SLEEP */
+static inline void destroy_suspended_device(struct class *cls, dev_t devt)
+{
+	device_destroy(cls, devt);
+}
+#endif /* !CONFIG_PM_SLEEP */
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
Index: linux-2.6/drivers/base/power/power.h
===================================================================
--- linux-2.6.orig/drivers/base/power/power.h
+++ linux-2.6/drivers/base/power/power.h
@@ -20,6 +20,9 @@ static inline struct device *to_device(s
 
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
+extern void device_pm_schedule_removal(struct device *);
+extern int pm_sleep_lock(void);
+extern void pm_sleep_unlock(void);
 
 #else /* CONFIG_PM_SLEEP */
 
@@ -32,6 +35,15 @@ static inline void device_pm_remove(stru
 {
 }
 
+static inline int pm_sleep_lock(void)
+{
+	return 0;
+}
+
+static inline void pm_sleep_unlock(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PM
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -24,20 +24,45 @@
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/resume-trace.h>
+#include <linux/rwsem.h>
 
 #include "../base.h"
 #include "power.h"
 
+/*
+ * The entries in the dpm_active list are in a depth first order, simply
+ * because children are guaranteed to be discovered after parents, and
+ * are inserted at the back of the list on discovery.
+ *
+ * All the other lists are kept in the same order, for consistency.
+ * However the lists aren't always traversed in the same order.
+ * Semaphores must be acquired from the top (i.e., front) down
+ * and released in the opposite order.  Devices must be suspended
+ * from the bottom (i.e., end) up and resumed in the opposite order.
+ * That way no parent will be suspended while it still has an active
+ * child.
+ *
+ * Since device_pm_add() may be called with a device semaphore held,
+ * we must never try to acquire a device semaphore while holding
+ * dpm_list_mutex.
+ */
+
 LIST_HEAD(dpm_active);
+static LIST_HEAD(dpm_locked);
 static LIST_HEAD(dpm_off);
 static LIST_HEAD(dpm_off_irq);
+static LIST_HEAD(dpm_destroy);
 
-static DEFINE_MUTEX(dpm_mtx);
 static DEFINE_MUTEX(dpm_list_mtx);
 
-int (*platform_enable_wakeup)(struct device *dev, int is_on);
+static DECLARE_RWSEM(pm_sleep_rwsem);
 
+int (*platform_enable_wakeup)(struct device *dev, int is_on);
 
+/**
+ *	device_pm_add - add a device to the list of active devices
+ *	@dev:	Device to be added to the list
+ */
 void device_pm_add(struct device *dev)
 {
 	pr_debug("PM: Adding info for %s:%s\n",
@@ -48,8 +73,36 @@ void device_pm_add(struct device *dev)
 	mutex_unlock(&dpm_list_mtx);
 }
 
+/**
+ *	device_pm_remove - remove a device from the list of active devices
+ *	@dev:	Device to be removed from the list
+ *
+ *	This function also removes the device's PM-related sysfs attributes.
+ */
 void device_pm_remove(struct device *dev)
 {
+	/*
+	 * If this function is called during a suspend, it will be blocked,
+	 * because we're holding the device's semaphore at that time, which may
+	 * lead to a deadlock. In that case we want to print a warning.
+	 * However, it may also be called by unregister_dropped_devices() with
+	 * the device's semaphore released, in which case the warning should
+	 * not be printed.
+	 */
+	if (down_trylock(&dev->sem)) {
+		if (down_read_trylock(&pm_sleep_rwsem)) {
+			/* No suspend in progress, wait on dev->sem */
+			down(&dev->sem);
+			up_read(&pm_sleep_rwsem);
+		} else {
+			/* Suspend in progress, we may deadlock */
+			dev_warn(dev, "Suspicious %s during suspend\n",
+				__FUNCTION__);
+			dump_stack();
+			/* The user has been warned ... */
+			down(&dev->sem);
+		}
+	}
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
@@ -57,25 +110,126 @@ void device_pm_remove(struct device *dev
 	dpm_sysfs_remove(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+}
+
+/**
+ *	device_pm_schedule_removal - schedule the removal of a suspended device
+ *	@dev:	Device to destroy
+ *
+ *	If called during a suspend, it unlocks the device's semaphore and
+ *	unregisters it.  Otherwise, the device is unregistered without
+ *	releasing the semaphore, but a warning is printed and the stack is
+ *	dumped.
+ */
+void device_pm_schedule_removal(struct device *dev)
+{
+	pr_debug("PM: Preparing for removal: %s:%s\n",
+		dev->bus ? dev->bus->name : "No Bus",
+		kobject_name(&dev->kobj));
+	mutex_lock(&dpm_list_mtx);
+	list_move_tail(&dev->power.entry, &dpm_destroy);
+	mutex_unlock(&dpm_list_mtx);
+}
+
+/**
+ *	pm_sleep_lock - mutual exclusion for registration and suspend
+ *
+ *	Returns 0 if no suspend is underway and device registration
+ *	may proceed, otherwise -EBUSY.
+ */
+int pm_sleep_lock(void)
+{
+	if (down_read_trylock(&pm_sleep_rwsem))
+		return 0;
+
+	return -EBUSY;
+}
+
+/**
+ *	pm_sleep_unlock - mutual exclusion for registration and suspend
+ *
+ *	This routine undoes the effect of device_pm_add_lock
+ *	when a device's registration is complete.
+ */
+void pm_sleep_unlock(void)
+{
+	up_read(&pm_sleep_rwsem);
 }
 
 
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device - Restore state for one device.
+ *	resume_device_early - Power on one device (early resume).
  *	@dev:	Device.
  *
+ *	Must be called with interrupts disabled.
  */
-
-static int resume_device(struct device * dev)
+static int resume_device_early(struct device *dev)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	down(&dev->sem);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *
+ *	Walk the dpm_off_irq list and power each device up. This
+ *	is used for devices that required they be powered down with
+ *	interrupts disabled. As devices are powered on, they are moved
+ *	to the dpm_off list.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
+ */
+static void dpm_power_up(void)
+{
+
+	while (!list_empty(&dpm_off_irq)) {
+		struct list_head *entry = dpm_off_irq.next;
+		struct device *dev = to_device(entry);
+
+		list_move_tail(entry, &dpm_off);
+		resume_device_early(dev);
+	}
+}
+
+/**
+ *	device_power_up - Turn on all devices that need special attention.
+ *
+ *	Power on system devices, then devices that required we shut them down
+ *	with interrupts disabled.
+ *
+ *	Must be called with interrupts disabled.
+ */
+void device_power_up(void)
+{
+	sysdev_resume();
+	dpm_power_up();
+}
+EXPORT_SYMBOL_GPL(device_power_up);
+
+/**
+ *	resume_device - Restore state for one device.
+ *	@dev:	Device.
+ *
+ */
+static int resume_device(struct device *dev)
+{
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
 	if (dev->bus && dev->bus->resume) {
 		dev_dbg(dev,"resuming\n");
@@ -92,126 +246,90 @@ static int resume_device(struct device *
 		error = dev->class->resume(dev);
 	}
 
-	up(&dev->sem);
-
-	TRACE_RESUME(error);
-	return error;
-}
-
-
-static int resume_device_early(struct device * dev)
-{
-	int error = 0;
-
-	TRACE_DEVICE(dev);
-	TRACE_RESUME(0);
-	if (dev->bus && dev->bus->resume_early) {
-		dev_dbg(dev,"EARLY resume\n");
-		error = dev->bus->resume_early(dev);
-	}
 	TRACE_RESUME(error);
 	return error;
 }
 
-/*
- * Resume the devices that have either not gone through
- * the late suspend, or that did go through it but also
- * went through the early resume
+/**
+ *	dpm_resume - Resume every device.
+ *
+ *	Resume the devices that have either not gone through
+ *	the late suspend, or that did go through it but also
+ *	went through the early resume.
+ *
+ *	Take devices from the dpm_off_list, resume them,
+ *	and put them on the dpm_locked list.
  */
 static void dpm_resume(void)
 {
 	mutex_lock(&dpm_list_mtx);
 	while(!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.next;
-		struct device * dev = to_device(entry);
-
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
+		struct list_head *entry = dpm_off.next;
+		struct device *dev = to_device(entry);
 
+		list_move_tail(entry, &dpm_locked);
 		mutex_unlock(&dpm_list_mtx);
 		resume_device(dev);
 		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
 }
 
-
 /**
- *	device_resume - Restore state of each device in system.
+ *	unlock_all_devices - Release each device's semaphore
  *
- *	Walk the dpm_off list, remove each entry, resume the device,
- *	then add it to the dpm_active list.
+ *	Go through the dpm_off list.  Put each device on the dpm_active
+ *	list and unlock it.
  */
-
-void device_resume(void)
+static void unlock_all_devices(void)
 {
-	might_sleep();
-	mutex_lock(&dpm_mtx);
-	dpm_resume();
-	mutex_unlock(&dpm_mtx);
+	mutex_lock(&dpm_list_mtx);
+ 	while (!list_empty(&dpm_locked)) {
+ 		struct list_head *entry = dpm_locked.prev;
+ 		struct device *dev = to_device(entry);
+
+ 		list_move(entry, &dpm_active);
+ 		up(&dev->sem);
+ 	}
+	mutex_unlock(&dpm_list_mtx);
 }
 
-EXPORT_SYMBOL_GPL(device_resume);
-
-
 /**
- *	dpm_power_up - Power on some devices.
- *
- *	Walk the dpm_off_irq list and power each device up. This
- *	is used for devices that required they be powered down with
- *	interrupts disabled. As devices are powered on, they are moved
- *	to the dpm_active list.
+ *	unregister_dropped_devices - Unregister devices scheduled for removal
  *
- *	Interrupts must be disabled when calling this.
+ *	Unregister all devices on the dpm_destroy list.
  */
-
-static void dpm_power_up(void)
+static void unregister_dropped_devices(void)
 {
-	while(!list_empty(&dpm_off_irq)) {
-		struct list_head * entry = dpm_off_irq.next;
-		struct device * dev = to_device(entry);
-
-		list_move_tail(entry, &dpm_off);
-		resume_device_early(dev);
+	while (!list_empty(&dpm_destroy)) {
+		struct list_head *entry = dpm_destroy.next;
+		struct device *dev = to_device(entry);
+
+		up(&dev->sem);
+		/* This also removes the device from the list */
+		device_unregister(dev);
 	}
 }
 
-
 /**
- *	device_power_up - Turn on all devices that need special attention.
+ *	device_resume - Restore state of each device in system.
  *
- *	Power on system devices then devices that required we shut them down
- *	with interrupts disabled.
- *	Called with interrupts disabled.
+ *	Resume all the devices, unlock them all, and allow new
+ *	devices to be registered once again.
  */
-
-void device_power_up(void)
+void device_resume(void)
 {
-	sysdev_resume();
-	dpm_power_up();
+	might_sleep();
+	dpm_resume();
+	unlock_all_devices();
+	unregister_dropped_devices();
+	up_write(&pm_sleep_rwsem);
 }
-
-EXPORT_SYMBOL_GPL(device_power_up);
+EXPORT_SYMBOL_GPL(device_resume);
 
 
 /*------------------------- Suspend routines -------------------------*/
 
-/*
- * The entries in the dpm_active list are in a depth first order, simply
- * because children are guaranteed to be discovered after parents, and
- * are inserted at the back of the list on discovery.
- *
- * All list on the suspend path are done in reverse order, so we operate
- * on the leaves of the device tree (or forests, depending on how you want
- * to look at it ;) first. As nodes are removed from the back of the list,
- * they are inserted into the front of their destintation lists.
- *
- * Things are the reverse on the resume path - iterations are done in
- * forward order, and nodes are inserted at the back of their destination
- * lists. This way, the ancestors will be accessed before their descendents.
- */
-
 static inline char *suspend_verb(u32 event)
 {
 	switch (event) {
@@ -222,7 +340,6 @@ static inline char *suspend_verb(u32 eve
 	}
 }
 
-
 static void
 suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
 {
@@ -232,16 +349,73 @@ suspend_device_dbg(struct device *dev, p
 }
 
 /**
- *	suspend_device - Save state of one device.
+ *	suspend_device_late - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state:	Power state device is entering.
+ *
+ *	This is called with interrupts off and only a single CPU running.
+ */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
+
+	if (dev->bus && dev->bus->suspend_late) {
+		suspend_device_dbg(dev, state, "LATE ");
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
+
+/**
+ *	device_power_down - Shut down special devices.
+ *	@state:		Power state to enter.
+ *
+ *	Power down devices that require interrupts to be disabled
+ *	and move them from the dpm_off list to the dpm_off_irq list.
+ *	Then power down system devices.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
  */
+int device_power_down(pm_message_t state)
+{
+	int error = 0;
 
-static int suspend_device(struct device * dev, pm_message_t state)
+	while (!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.prev;
+		struct device *dev = to_device(entry);
+
+		list_del_init(&dev->power.entry);
+		error = suspend_device_late(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not power down device %s: "
+					"error %d\n",
+					kobject_name(&dev->kobj), error);
+			if (list_empty(&dev->power.entry))
+				list_add(&dev->power.entry, &dpm_off);
+			break;
+		}
+		if (list_empty(&dev->power.entry))
+			list_add(&dev->power.entry, &dpm_off_irq);
+	}
+
+	if (!error)
+		error = sysdev_suspend(state);
+	if (error)
+		dpm_power_up();
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_power_down);
+
+/**
+ *	suspend_device - Save state of one device.
+ *	@dev:	Device.
+ *	@state:	Power state device is entering.
+ */
+int suspend_device(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
 	if (dev->power.power_state.event) {
 		dev_dbg(dev, "PM: suspend %d-->%d\n",
 			dev->power.power_state.event, state.event);
@@ -264,123 +438,105 @@ static int suspend_device(struct device 
 		error = dev->bus->suspend(dev, state);
 		suspend_report_result(dev->bus->suspend, error);
 	}
-	up(&dev->sem);
-	return error;
-}
-
-
-/*
- * This is called with interrupts off, only a single CPU
- * running. We can't acquire a mutex or semaphore (and we don't
- * need the protection)
- */
-static int suspend_device_late(struct device *dev, pm_message_t state)
-{
-	int error = 0;
-
-	if (dev->bus && dev->bus->suspend_late) {
-		suspend_device_dbg(dev, state, "LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
-	}
 	return error;
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
- *	@state:		Power state to put each device in.
+ *	dpm_suspend - Suspend every device.
+ *	@state:	Power state to put each device in.
  *
- *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to the dpm_off list.
+ *	Walk the dpm_locked list.  Suspend each device and move it
+ *	to the dpm_off list.
  *
  *	(For historical reasons, if it returns -EAGAIN, that used to mean
  *	that the device would be called again with interrupts disabled.
  *	These days, we use the "suspend_late()" callback for that, so we
  *	print a warning and consider it an error).
- *
- *	If we get a different error, try and back out.
- *
- *	If we hit a failure with any of the devices, call device_resume()
- *	above to bring the suspended devices back to life.
- *
  */
-
-int device_suspend(pm_message_t state)
+static int dpm_suspend(pm_message_t state)
 {
 	int error = 0;
 
-	might_sleep();
-	mutex_lock(&dpm_mtx);
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_active) && error == 0) {
-		struct list_head * entry = dpm_active.prev;
-		struct device * dev = to_device(entry);
+	while (!list_empty(&dpm_locked)) {
+		struct list_head *entry = dpm_locked.prev;
+		struct device *dev = to_device(entry);
 
-		get_device(dev);
+		list_del_init(&dev->power.entry);
 		mutex_unlock(&dpm_list_mtx);
-
 		error = suspend_device(dev, state);
-
-		mutex_lock(&dpm_list_mtx);
-
-		/* Check if the device got removed */
-		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off list */
-			if (!error)
-				list_move(&dev->power.entry, &dpm_off);
-		}
-		if (error)
+		if (error) {
 			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d%s\n",
-				kobject_name(&dev->kobj), error,
-				error == -EAGAIN ? " (please convert to suspend_late)" : "");
-		put_device(dev);
+					"error %d%s\n",
+					kobject_name(&dev->kobj),
+					error,
+					(error == -EAGAIN ?
+					" (please convert to suspend_late)" :
+					""));
+			mutex_lock(&dpm_list_mtx);
+			if (list_empty(&dev->power.entry))
+				list_add(&dev->power.entry, &dpm_locked);
+			mutex_unlock(&dpm_list_mtx);
+			break;
+		}
+		mutex_lock(&dpm_list_mtx);
+		if (list_empty(&dev->power.entry))
+ 			list_add(&dev->power.entry, &dpm_off);
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
-		dpm_resume();
 
-	mutex_unlock(&dpm_mtx);
 	return error;
 }
 
-EXPORT_SYMBOL_GPL(device_suspend);
-
 /**
- *	device_power_down - Shut down special devices.
- *	@state:		Power state to enter.
+ *	lock_all_devices - Acquire every device's semaphore
  *
- *	Walk the dpm_off_irq list, calling ->power_down() for each device that
- *	couldn't power down the device with interrupts enabled. When we're
- *	done, power down system devices.
+ *	Go through the dpm_active list. Carefully lock each device's
+ *	semaphore and put it in on the dpm_locked list.
  */
-
-int device_power_down(pm_message_t state)
+static void lock_all_devices(void)
 {
-	int error = 0;
-	struct device * dev;
-
-	while (!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.prev;
+	mutex_lock(&dpm_list_mtx);
+	while (!list_empty(&dpm_active)) {
+		struct list_head *entry = dpm_active.next;
+		struct device *dev = to_device(entry);
+
+		/* Required locking order is dev->sem first,
+		 * then dpm_list_mutex.  Hence this awkward code.
+		 */
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+		down(&dev->sem);
+		mutex_lock(&dpm_list_mtx);
 
-		dev = to_device(entry);
-		error = suspend_device_late(dev, state);
-		if (error)
-			goto Error;
-		list_move(&dev->power.entry, &dpm_off_irq);
+		if (list_empty(entry))
+			up(&dev->sem);		/* Device was removed */
+		else
+			list_move_tail(entry, &dpm_locked);
+		put_device(dev);
 	}
+	mutex_unlock(&dpm_list_mtx);
+}
 
-	error = sysdev_suspend(state);
- Done:
+/**
+ *	device_suspend - Save state and stop all devices in system.
+ *
+ *	Prevent new devices from being registered, then lock all devices
+ *	and suspend them.
+ */
+int device_suspend(pm_message_t state)
+{
+	int error;
+
+	might_sleep();
+	down_write(&pm_sleep_rwsem);
+	lock_all_devices();
+	error = dpm_suspend(state);
+	if (error)
+		device_resume();
 	return error;
- Error:
-	printk(KERN_ERR "Could not power down device %s: "
-		"error %d\n", kobject_name(&dev->kobj), error);
-	dpm_power_up();
-	goto Done;
 }
-
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
Index: linux-2.6/arch/x86/kernel/msr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/msr.c
+++ linux-2.6/arch/x86/kernel/msr.c
@@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callb
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = msr_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		msr_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
Index: linux-2.6/arch/x86/kernel/cpuid.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpuid.c
+++ linux-2.6/arch/x86/kernel/cpuid.c
@@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_cal
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = cpuid_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		cpuid_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 20:37                                         ` Rafael J. Wysocki
@ 2008-01-07 21:32                                           ` Alan Stern
  2008-01-08  0:25                                             ` Rafael J. Wysocki
  2008-01-08  0:25                                             ` Rafael J. Wysocki
  2008-01-07 21:32                                           ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 21:32 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:

> > > Do you mean it might have been released already by another thread
> > > calling device_pm_destroy_suspended() on the same device?
> > 
> > I was thinking that it might be called before lock_all_devices().
> 
> I've added pm_sleep_start_end_mtx and the locking dance in
> device_pm_destroy_suspended() specifically to prevent this from happening.

Yes, I see.  What about the fact that device_suspend() locks 
pm_sleep_start_end_mtx first and pm_sleep_rwsem second, whereas 
device_pm_destroy_suspended() locks pm_sleep_start_end_mtx while 
holding pm_sleep_rwsem?  That should produce a lockdep warning.

> > However let's ignore that possibility and simplify the discussion by 
> > assuming that destroy_suspended_device() is never called except by a 
> > suspend or resume method for that device or one of its ancestors.  
> 
> It may also be called by one of the CPU hotplug notifiers.

This suggests another approach, simpler but not as general.  So far all
the problems we've seen have been associated with those CPU notifiers.  
Suppose the notifications about CPUs that failed to come back up were
delayed until after the resume was complete?  Drivers like msr would
then have to check in their resume handler whether the CPU was actually 
up, but no other changes would be needed.

In this way we could fix the immediate problem.  It wouldn't help with 
other sorts of devices that need to be unregistered during a suspend, 
though.

> Okay, well, now I'm leaning towards the asynchronous approach.
> 
> I'll prepare a new patch and send it later today.

Okay.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 20:37                                         ` Rafael J. Wysocki
  2008-01-07 21:32                                           ` Alan Stern
@ 2008-01-07 21:32                                           ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 21:32 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:

> > > Do you mean it might have been released already by another thread
> > > calling device_pm_destroy_suspended() on the same device?
> > 
> > I was thinking that it might be called before lock_all_devices().
> 
> I've added pm_sleep_start_end_mtx and the locking dance in
> device_pm_destroy_suspended() specifically to prevent this from happening.

Yes, I see.  What about the fact that device_suspend() locks 
pm_sleep_start_end_mtx first and pm_sleep_rwsem second, whereas 
device_pm_destroy_suspended() locks pm_sleep_start_end_mtx while 
holding pm_sleep_rwsem?  That should produce a lockdep warning.

> > However let's ignore that possibility and simplify the discussion by 
> > assuming that destroy_suspended_device() is never called except by a 
> > suspend or resume method for that device or one of its ancestors.  
> 
> It may also be called by one of the CPU hotplug notifiers.

This suggests another approach, simpler but not as general.  So far all
the problems we've seen have been associated with those CPU notifiers.  
Suppose the notifications about CPUs that failed to come back up were
delayed until after the resume was complete?  Drivers like msr would
then have to check in their resume handler whether the CPU was actually 
up, but no other changes would be needed.

In this way we could fix the immediate problem.  It wouldn't help with 
other sorts of devices that need to be unregistered during a suspend, 
though.

> Okay, well, now I'm leaning towards the asynchronous approach.
> 
> I'll prepare a new patch and send it later today.

Okay.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 19:29                                       ` Alan Stern
  2008-01-07 20:37                                         ` Rafael J. Wysocki
@ 2008-01-07 20:37                                         ` Rafael J. Wysocki
  2008-01-07 21:32                                           ` Alan Stern
  2008-01-07 21:32                                           ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-07 20:37 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Monday, 7 of January 2008, Alan Stern wrote:
> On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Monday, 7 of January 2008, Alan Stern wrote:
> > > On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> > > > current idea about how to do that.
> > > 
> > > It has some problems.
> > > 
> > > First, note that the list manipulations in dpm_suspend(), 
> > > device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
> > > your patch could corrupt the list pointers.
> > 
> > Yes, they need the locking.  I have overlooked that, mostly because the locking
> > was removed by gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch
> > too (because you assumed there woundn't be any need to remove a device during
> > a suspend, right?).
> 
> Right.
> 
> > > Are you assuming that no other threads can be running at this time?
> > 
> > No, I'm not.
> > 
> > > Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
> > > doesn't know whether or not dev->sem was locked to begin with.
> > 
> > Do you mean it might have been released already by another thread
> > calling device_pm_destroy_suspended() on the same device?
> 
> I was thinking that it might be called before lock_all_devices().

I've added pm_sleep_start_end_mtx and the locking dance in
device_pm_destroy_suspended() specifically to prevent this from happening.

> However let's ignore that possibility and simplify the discussion by 
> assuming that destroy_suspended_device() is never called except by a 
> suspend or resume method for that device or one of its ancestors.  

It may also be called by one of the CPU hotplug notifiers.

> (This still leaves the possibility that it might get called by mistake 
> during a runtime suspend or resume...)
> 
> > > Do you want to rule out the possibility of a driver's suspend or remove 
> > > methods calling destroy_suspended_device() on its own device?  With 
> > > your synchronous approach, this would mean that the suspend/resume 
> > > method would indirectly end up calling the remove method.  This is 
> > > dangerous at best; with USB it would be a lockdep violation.  With an 
> > > asynchronous approach, on the other hand, this wouldn't be a problem.
> > 
> > Well, the asynchronous apprach has the problem that the device may end up
> > on a wrong list when removed by one of the .suspend() callbacks (and I don't
> > see how to avoid that without extra complexity).  Perhaps that's something we
> > can live with, though.
> 
> The same problem affects the synchronous approach.

No, it doesn't as of the $subject patch (the list_empty() tests should help).

> We can fix it by having dpm_suspend() do the list_move() before calling
> suspend_device().  Then if the suspend fails move the device back.

Yes, we can.

> > One more question: is there any particular reason not to call
> > device_pm_remove() at the beginning of device_del()?
> 
> I think it's done this way to avoid having a window where the device 
> isn't on a PM list and is still owned by the bus and the driver.  But 
> if a suspend occurs during that window, it shouldn't matter that the 
> device will be left unsuspended.  After all, the same thing would have 
> happened if the suspend occurred after bus_remove_device().
> 
> So no, there shouldn't be a problem with moving the call.

Okay, well, now I'm leaning towards the asynchronous approach.

I'll prepare a new patch and send it later today.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 19:29                                       ` Alan Stern
@ 2008-01-07 20:37                                         ` Rafael J. Wysocki
  2008-01-07 20:37                                         ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-07 20:37 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Monday, 7 of January 2008, Alan Stern wrote:
> On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Monday, 7 of January 2008, Alan Stern wrote:
> > > On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> > > > current idea about how to do that.
> > > 
> > > It has some problems.
> > > 
> > > First, note that the list manipulations in dpm_suspend(), 
> > > device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
> > > your patch could corrupt the list pointers.
> > 
> > Yes, they need the locking.  I have overlooked that, mostly because the locking
> > was removed by gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch
> > too (because you assumed there woundn't be any need to remove a device during
> > a suspend, right?).
> 
> Right.
> 
> > > Are you assuming that no other threads can be running at this time?
> > 
> > No, I'm not.
> > 
> > > Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
> > > doesn't know whether or not dev->sem was locked to begin with.
> > 
> > Do you mean it might have been released already by another thread
> > calling device_pm_destroy_suspended() on the same device?
> 
> I was thinking that it might be called before lock_all_devices().

I've added pm_sleep_start_end_mtx and the locking dance in
device_pm_destroy_suspended() specifically to prevent this from happening.

> However let's ignore that possibility and simplify the discussion by 
> assuming that destroy_suspended_device() is never called except by a 
> suspend or resume method for that device or one of its ancestors.  

It may also be called by one of the CPU hotplug notifiers.

> (This still leaves the possibility that it might get called by mistake 
> during a runtime suspend or resume...)
> 
> > > Do you want to rule out the possibility of a driver's suspend or remove 
> > > methods calling destroy_suspended_device() on its own device?  With 
> > > your synchronous approach, this would mean that the suspend/resume 
> > > method would indirectly end up calling the remove method.  This is 
> > > dangerous at best; with USB it would be a lockdep violation.  With an 
> > > asynchronous approach, on the other hand, this wouldn't be a problem.
> > 
> > Well, the asynchronous apprach has the problem that the device may end up
> > on a wrong list when removed by one of the .suspend() callbacks (and I don't
> > see how to avoid that without extra complexity).  Perhaps that's something we
> > can live with, though.
> 
> The same problem affects the synchronous approach.

No, it doesn't as of the $subject patch (the list_empty() tests should help).

> We can fix it by having dpm_suspend() do the list_move() before calling
> suspend_device().  Then if the suspend fails move the device back.

Yes, we can.

> > One more question: is there any particular reason not to call
> > device_pm_remove() at the beginning of device_del()?
> 
> I think it's done this way to avoid having a window where the device 
> isn't on a PM list and is still owned by the bus and the driver.  But 
> if a suspend occurs during that window, it shouldn't matter that the 
> device will be left unsuspended.  After all, the same thing would have 
> happened if the suspend occurred after bus_remove_device().
> 
> So no, there shouldn't be a problem with moving the call.

Okay, well, now I'm leaning towards the asynchronous approach.

I'll prepare a new patch and send it later today.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 18:01                                     ` Rafael J. Wysocki
@ 2008-01-07 19:29                                       ` Alan Stern
  2008-01-07 20:37                                         ` Rafael J. Wysocki
  2008-01-07 20:37                                         ` Rafael J. Wysocki
  2008-01-07 19:29                                       ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 19:29 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:

> On Monday, 7 of January 2008, Alan Stern wrote:
> > On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> > > current idea about how to do that.
> > 
> > It has some problems.
> > 
> > First, note that the list manipulations in dpm_suspend(), 
> > device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
> > your patch could corrupt the list pointers.
> 
> Yes, they need the locking.  I have overlooked that, mostly because the locking
> was removed by gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch
> too (because you assumed there woundn't be any need to remove a device during
> a suspend, right?).

Right.

> > Are you assuming that no other threads can be running at this time?
> 
> No, I'm not.
> 
> > Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
> > doesn't know whether or not dev->sem was locked to begin with.
> 
> Do you mean it might have been released already by another thread
> calling device_pm_destroy_suspended() on the same device?

I was thinking that it might be called before lock_all_devices().

However let's ignore that possibility and simplify the discussion by 
assuming that destroy_suspended_device() is never called except by a 
suspend or resume method for that device or one of its ancestors.  
(This still leaves the possibility that it might get called by mistake 
during a runtime suspend or resume...)

> > Do you want to rule out the possibility of a driver's suspend or remove 
> > methods calling destroy_suspended_device() on its own device?  With 
> > your synchronous approach, this would mean that the suspend/resume 
> > method would indirectly end up calling the remove method.  This is 
> > dangerous at best; with USB it would be a lockdep violation.  With an 
> > asynchronous approach, on the other hand, this wouldn't be a problem.
> 
> Well, the asynchronous apprach has the problem that the device may end up
> on a wrong list when removed by one of the .suspend() callbacks (and I don't
> see how to avoid that without extra complexity).  Perhaps that's something we
> can live with, though.

The same problem affects the synchronous approach.  We can fix it by
having dpm_suspend() do the list_move() before calling
suspend_device().  Then if the suspend fails move the device back.

> One more question: is there any particular reason not to call
> device_pm_remove() at the beginning of device_del()?

I think it's done this way to avoid having a window where the device 
isn't on a PM list and is still owned by the bus and the driver.  But 
if a suspend occurs during that window, it shouldn't matter that the 
device will be left unsuspended.  After all, the same thing would have 
happened if the suspend occurred after bus_remove_device().

So no, there shouldn't be a problem with moving the call.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 18:01                                     ` Rafael J. Wysocki
  2008-01-07 19:29                                       ` Alan Stern
@ 2008-01-07 19:29                                       ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 19:29 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:

> On Monday, 7 of January 2008, Alan Stern wrote:
> > On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> > > current idea about how to do that.
> > 
> > It has some problems.
> > 
> > First, note that the list manipulations in dpm_suspend(), 
> > device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
> > your patch could corrupt the list pointers.
> 
> Yes, they need the locking.  I have overlooked that, mostly because the locking
> was removed by gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch
> too (because you assumed there woundn't be any need to remove a device during
> a suspend, right?).

Right.

> > Are you assuming that no other threads can be running at this time?
> 
> No, I'm not.
> 
> > Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
> > doesn't know whether or not dev->sem was locked to begin with.
> 
> Do you mean it might have been released already by another thread
> calling device_pm_destroy_suspended() on the same device?

I was thinking that it might be called before lock_all_devices().

However let's ignore that possibility and simplify the discussion by 
assuming that destroy_suspended_device() is never called except by a 
suspend or resume method for that device or one of its ancestors.  
(This still leaves the possibility that it might get called by mistake 
during a runtime suspend or resume...)

> > Do you want to rule out the possibility of a driver's suspend or remove 
> > methods calling destroy_suspended_device() on its own device?  With 
> > your synchronous approach, this would mean that the suspend/resume 
> > method would indirectly end up calling the remove method.  This is 
> > dangerous at best; with USB it would be a lockdep violation.  With an 
> > asynchronous approach, on the other hand, this wouldn't be a problem.
> 
> Well, the asynchronous apprach has the problem that the device may end up
> on a wrong list when removed by one of the .suspend() callbacks (and I don't
> see how to avoid that without extra complexity).  Perhaps that's something we
> can live with, though.

The same problem affects the synchronous approach.  We can fix it by
having dpm_suspend() do the list_move() before calling
suspend_device().  Then if the suspend fails move the device back.

> One more question: is there any particular reason not to call
> device_pm_remove() at the beginning of device_del()?

I think it's done this way to avoid having a window where the device 
isn't on a PM list and is still owned by the bus and the driver.  But 
if a suspend occurs during that window, it shouldn't matter that the 
device will be left unsuspended.  After all, the same thing would have 
happened if the suspend occurred after bus_remove_device().

So no, there shouldn't be a problem with moving the call.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 17:23                                   ` Alan Stern
  2008-01-07 18:01                                     ` Rafael J. Wysocki
@ 2008-01-07 18:01                                     ` Rafael J. Wysocki
  2008-01-07 19:29                                       ` Alan Stern
  2008-01-07 19:29                                       ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-07 18:01 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Monday, 7 of January 2008, Alan Stern wrote:
> On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> 
> > Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> > current idea about how to do that.
> 
> It has some problems.
> 
> First, note that the list manipulations in dpm_suspend(), 
> device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
> your patch could corrupt the list pointers.

Yes, they need the locking.  I have overlooked that, mostly because the locking
was removed by gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch
too (because you assumed there woundn't be any need to remove a device during
a suspend, right?).

> Are you assuming that no other threads can be running at this time?

No, I'm not.

> Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
> doesn't know whether or not dev->sem was locked to begin with.

Do you mean it might have been released already by another thread
calling device_pm_destroy_suspended() on the same device?

> Do you want to rule out the possibility of a driver's suspend or remove 
> methods calling destroy_suspended_device() on its own device?  With 
> your synchronous approach, this would mean that the suspend/resume 
> method would indirectly end up calling the remove method.  This is 
> dangerous at best; with USB it would be a lockdep violation.  With an 
> asynchronous approach, on the other hand, this wouldn't be a problem.

Well, the asynchronous apprach has the problem that the device may end up
on a wrong list when removed by one of the .suspend() callbacks (and I don't
see how to avoid that without extra complexity).  Perhaps that's something we
can live with, though.

One more question: is there any particular reason not to call
device_pm_remove() at the beginning of device_del()?

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 17:23                                   ` Alan Stern
@ 2008-01-07 18:01                                     ` Rafael J. Wysocki
  2008-01-07 18:01                                     ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-07 18:01 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Monday, 7 of January 2008, Alan Stern wrote:
> On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:
> 
> > Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> > current idea about how to do that.
> 
> It has some problems.
> 
> First, note that the list manipulations in dpm_suspend(), 
> device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
> your patch could corrupt the list pointers.

Yes, they need the locking.  I have overlooked that, mostly because the locking
was removed by gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch
too (because you assumed there woundn't be any need to remove a device during
a suspend, right?).

> Are you assuming that no other threads can be running at this time?

No, I'm not.

> Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
> doesn't know whether or not dev->sem was locked to begin with.

Do you mean it might have been released already by another thread
calling device_pm_destroy_suspended() on the same device?

> Do you want to rule out the possibility of a driver's suspend or remove 
> methods calling destroy_suspended_device() on its own device?  With 
> your synchronous approach, this would mean that the suspend/resume 
> method would indirectly end up calling the remove method.  This is 
> dangerous at best; with USB it would be a lockdep violation.  With an 
> asynchronous approach, on the other hand, this wouldn't be a problem.

Well, the asynchronous apprach has the problem that the device may end up
on a wrong list when removed by one of the .suspend() callbacks (and I don't
see how to avoid that without extra complexity).  Perhaps that's something we
can live with, though.

One more question: is there any particular reason not to call
device_pm_remove() at the beginning of device_del()?

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 16:51                                 ` Rafael J. Wysocki
@ 2008-01-07 17:23                                   ` Alan Stern
  2008-01-07 18:01                                     ` Rafael J. Wysocki
  2008-01-07 18:01                                     ` Rafael J. Wysocki
  2008-01-07 17:23                                   ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 17:23 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:

> Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> current idea about how to do that.

It has some problems.

First, note that the list manipulations in dpm_suspend(), 
device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
your patch could corrupt the list pointers.  Are you assuming that no 
other threads can be running at this time?

Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
doesn't know whether or not dev->sem was locked to begin with.

Do you want to rule out the possibility of a driver's suspend or remove 
methods calling destroy_suspended_device() on its own device?  With 
your synchronous approach, this would mean that the suspend/resume 
method would indirectly end up calling the remove method.  This is 
dangerous at best; with USB it would be a lockdep violation.  With an 
asynchronous approach, on the other hand, this wouldn't be a problem.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 16:51                                 ` Rafael J. Wysocki
  2008-01-07 17:23                                   ` Alan Stern
@ 2008-01-07 17:23                                   ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 17:23 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Mon, 7 Jan 2008, Rafael J. Wysocki wrote:

> Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
> current idea about how to do that.

It has some problems.

First, note that the list manipulations in dpm_suspend(), 
device_power_down(), and so on aren't protected by dpm_list_mtx.  So 
your patch could corrupt the list pointers.  Are you assuming that no 
other threads can be running at this time?

Note also that device_pm_destroy_suspended() does up(&dev->sem), but it 
doesn't know whether or not dev->sem was locked to begin with.

Do you want to rule out the possibility of a driver's suspend or remove 
methods calling destroy_suspended_device() on its own device?  With 
your synchronous approach, this would mean that the suspend/resume 
method would indirectly end up calling the remove method.  This is 
dangerous at best; with USB it would be a lockdep violation.  With an 
asynchronous approach, on the other hand, this wouldn't be a problem.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 16:16                               ` Alan Stern
@ 2008-01-07 16:51                                 ` Rafael J. Wysocki
  2008-01-07 17:23                                   ` Alan Stern
  2008-01-07 17:23                                   ` Alan Stern
  2008-01-07 16:51                                 ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-07 16:51 UTC (permalink / raw)
  To: Alan Stern
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Monday, 7 of January 2008, Alan Stern wrote:
> Let's try to summarize the main issues here:
> 
>      1. We want the PM core to lock all devices during suspend and
> 	hibernation.  This implies that registration and unregistration
> 	at such times can't work, because they need to lock the
> 	device sem in order to make probe and remove method calls.
> 
>      2. Registration calls can be failed, with an error message in the
> 	system log.  However unregistration calls cannot fail.  They
> 	_can_ block until the system resumes, but if the unregistration
> 	call was made from within a suspend or resume method it will
> 	deadlock.  This seems inescapable, but at least we should print
> 	an error in the log so the offending driver can be identified.
> 
>      3. In response to 2, the PM core should have a special routine for
> 	unregistering devices while a suspend is in progress.  Rafael
> 	proposed that the core should unlock the device to permit the
> 	call to go through.  This seems dangerous to me; I would prefer
> 	to leave the locks in place and defer the unregistration until
> 	after the system is back up and the locks have all been 
> 	dropped.  This would avoid all sorts of locking, deadlock, and 
> 	mutual exclusion problems.
> 
> (As a side note: destroy_suspended_device() has a rather limited
> interface anyway, since it can handle only devices that were created by
> create_device().)
> 
>      4. Rafael pointed out that unregistration can occur concurrently
> 	with system suspend.  When this happens we can end up trying to
> 	suspend a device which has already been through 
> 	bus_remove_device(), because it hasn't yet been removed from 
> 	the dpm_active list.  He proposes we make unregistration block
> 	system suspend, just as registration does.
> 
> I don't see 4 as a real problem.  Starting an unregistration before
> the suspend and finishing it afterward should be okay.  Once a device
> has gone through bus_remove_device() it hasn't got a suspend method any
> more, so trying to suspend it won't do anything at all -- the tests in
> suspend_device() will all fail.  Conversely, if bus_remove_device()  
> hasn't run yet then we would end up calling the driver's suspend method
> before the device_del() call returns.  As Johannes pointed out, this is
> a normal race that would exist anyway.
> 
> On the other hand, having unregistration block system suspend wouldn't 
> actually be wrong.  I simply don't think it is necessary.  But note 
> that making the two mutually exclusive would complicate Rafael's 
> synchronous approach for destroy_suspended_device().
> 
>      5. All the discussion about pm_sleep_rwsem and so on is 
> 	implementation details.  Once we have settled on the correct
> 	approach for 1-4, the implementation should be relatively easy.

Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
current idea about how to do that.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-07 16:16                               ` Alan Stern
  2008-01-07 16:51                                 ` Rafael J. Wysocki
@ 2008-01-07 16:51                                 ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-07 16:51 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

On Monday, 7 of January 2008, Alan Stern wrote:
> Let's try to summarize the main issues here:
> 
>      1. We want the PM core to lock all devices during suspend and
> 	hibernation.  This implies that registration and unregistration
> 	at such times can't work, because they need to lock the
> 	device sem in order to make probe and remove method calls.
> 
>      2. Registration calls can be failed, with an error message in the
> 	system log.  However unregistration calls cannot fail.  They
> 	_can_ block until the system resumes, but if the unregistration
> 	call was made from within a suspend or resume method it will
> 	deadlock.  This seems inescapable, but at least we should print
> 	an error in the log so the offending driver can be identified.
> 
>      3. In response to 2, the PM core should have a special routine for
> 	unregistering devices while a suspend is in progress.  Rafael
> 	proposed that the core should unlock the device to permit the
> 	call to go through.  This seems dangerous to me; I would prefer
> 	to leave the locks in place and defer the unregistration until
> 	after the system is back up and the locks have all been 
> 	dropped.  This would avoid all sorts of locking, deadlock, and 
> 	mutual exclusion problems.
> 
> (As a side note: destroy_suspended_device() has a rather limited
> interface anyway, since it can handle only devices that were created by
> create_device().)
> 
>      4. Rafael pointed out that unregistration can occur concurrently
> 	with system suspend.  When this happens we can end up trying to
> 	suspend a device which has already been through 
> 	bus_remove_device(), because it hasn't yet been removed from 
> 	the dpm_active list.  He proposes we make unregistration block
> 	system suspend, just as registration does.
> 
> I don't see 4 as a real problem.  Starting an unregistration before
> the suspend and finishing it afterward should be okay.  Once a device
> has gone through bus_remove_device() it hasn't got a suspend method any
> more, so trying to suspend it won't do anything at all -- the tests in
> suspend_device() will all fail.  Conversely, if bus_remove_device()  
> hasn't run yet then we would end up calling the driver's suspend method
> before the device_del() call returns.  As Johannes pointed out, this is
> a normal race that would exist anyway.
> 
> On the other hand, having unregistration block system suspend wouldn't 
> actually be wrong.  I simply don't think it is necessary.  But note 
> that making the two mutually exclusive would complicate Rafael's 
> synchronous approach for destroy_suspended_device().
> 
>      5. All the discussion about pm_sleep_rwsem and so on is 
> 	implementation details.  Once we have settled on the correct
> 	approach for 1-4, the implementation should be relatively easy.

Please see the patch at: http://lkml.org/lkml/2008/1/6/298 .  It represents my
current idea about how to do that.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:47                             ` Rafael J. Wysocki
@ 2008-01-07 16:16                               ` Alan Stern
  2008-01-07 16:51                                 ` Rafael J. Wysocki
  2008-01-07 16:51                                 ` Rafael J. Wysocki
  2008-01-07 16:16                               ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 16:16 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Johannes Berg, Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

Let's try to summarize the main issues here:

     1. We want the PM core to lock all devices during suspend and
	hibernation.  This implies that registration and unregistration
	at such times can't work, because they need to lock the
	device sem in order to make probe and remove method calls.

     2. Registration calls can be failed, with an error message in the
	system log.  However unregistration calls cannot fail.  They
	_can_ block until the system resumes, but if the unregistration
	call was made from within a suspend or resume method it will
	deadlock.  This seems inescapable, but at least we should print
	an error in the log so the offending driver can be identified.

     3. In response to 2, the PM core should have a special routine for
	unregistering devices while a suspend is in progress.  Rafael
	proposed that the core should unlock the device to permit the
	call to go through.  This seems dangerous to me; I would prefer
	to leave the locks in place and defer the unregistration until
	after the system is back up and the locks have all been 
	dropped.  This would avoid all sorts of locking, deadlock, and 
	mutual exclusion problems.

(As a side note: destroy_suspended_device() has a rather limited
interface anyway, since it can handle only devices that were created by
create_device().)

     4. Rafael pointed out that unregistration can occur concurrently
	with system suspend.  When this happens we can end up trying to
	suspend a device which has already been through 
	bus_remove_device(), because it hasn't yet been removed from 
	the dpm_active list.  He proposes we make unregistration block
	system suspend, just as registration does.

I don't see 4 as a real problem.  Starting an unregistration before
the suspend and finishing it afterward should be okay.  Once a device
has gone through bus_remove_device() it hasn't got a suspend method any
more, so trying to suspend it won't do anything at all -- the tests in
suspend_device() will all fail.  Conversely, if bus_remove_device()  
hasn't run yet then we would end up calling the driver's suspend method
before the device_del() call returns.  As Johannes pointed out, this is
a normal race that would exist anyway.

On the other hand, having unregistration block system suspend wouldn't 
actually be wrong.  I simply don't think it is necessary.  But note 
that making the two mutually exclusive would complicate Rafael's 
synchronous approach for destroy_suspended_device().

     5. All the discussion about pm_sleep_rwsem and so on is 
	implementation details.  Once we have settled on the correct
	approach for 1-4, the implementation should be relatively easy.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:47                             ` Rafael J. Wysocki
  2008-01-07 16:16                               ` Alan Stern
@ 2008-01-07 16:16                               ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-07 16:16 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, Ingo Molnar, pm list,
	Johannes Berg, Andrew Morton

Let's try to summarize the main issues here:

     1. We want the PM core to lock all devices during suspend and
	hibernation.  This implies that registration and unregistration
	at such times can't work, because they need to lock the
	device sem in order to make probe and remove method calls.

     2. Registration calls can be failed, with an error message in the
	system log.  However unregistration calls cannot fail.  They
	_can_ block until the system resumes, but if the unregistration
	call was made from within a suspend or resume method it will
	deadlock.  This seems inescapable, but at least we should print
	an error in the log so the offending driver can be identified.

     3. In response to 2, the PM core should have a special routine for
	unregistering devices while a suspend is in progress.  Rafael
	proposed that the core should unlock the device to permit the
	call to go through.  This seems dangerous to me; I would prefer
	to leave the locks in place and defer the unregistration until
	after the system is back up and the locks have all been 
	dropped.  This would avoid all sorts of locking, deadlock, and 
	mutual exclusion problems.

(As a side note: destroy_suspended_device() has a rather limited
interface anyway, since it can handle only devices that were created by
create_device().)

     4. Rafael pointed out that unregistration can occur concurrently
	with system suspend.  When this happens we can end up trying to
	suspend a device which has already been through 
	bus_remove_device(), because it hasn't yet been removed from 
	the dpm_active list.  He proposes we make unregistration block
	system suspend, just as registration does.

I don't see 4 as a real problem.  Starting an unregistration before
the suspend and finishing it afterward should be okay.  Once a device
has gone through bus_remove_device() it hasn't got a suspend method any
more, so trying to suspend it won't do anything at all -- the tests in
suspend_device() will all fail.  Conversely, if bus_remove_device()  
hasn't run yet then we would end up calling the driver's suspend method
before the device_del() call returns.  As Johannes pointed out, this is
a normal race that would exist anyway.

On the other hand, having unregistration block system suspend wouldn't 
actually be wrong.  I simply don't think it is necessary.  But note 
that making the two mutually exclusive would complicate Rafael's 
synchronous approach for destroy_suspended_device().

     5. All the discussion about pm_sleep_rwsem and so on is 
	implementation details.  Once we have settled on the correct
	approach for 1-4, the implementation should be relatively easy.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:39                           ` Alan Stern
@ 2008-01-06 22:47                             ` Rafael J. Wysocki
  2008-01-07 16:16                               ` Alan Stern
  2008-01-07 16:16                               ` Alan Stern
  2008-01-06 22:47                             ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:47 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > No -- the whole idea here is to print an error message in the system
> > > log if a driver's resume method tries to call device_del().  Deadlock 
> > > is unavoidable in this case, but at least we'll know which driver is 
> > > guilty.
> > 
> > Still, if we do that, we won't need to acquire dev->sem in device_pm_remove()
> > any more.
> 
> There's a window in lock_all_devices() when dpm_list_mtx isn't held.  
> We don't want device_pm_remove() taking an already-locked device off 
> the dpm_locked list at that time.  So we do need to acquire dev->sem in 
> device_pm_remove().

Not if pm_sleep_rwsem is held by device_del(), since in that case we won't
reach lock_all_devices() (device_add() calls device_pm_remove() under
pm_sleep_rwsem already).

> > Apart from this, by acqiring pm_sleep_rwsem for reading in
> > device_del() we can prevent a suspend from starting while the device is being
> > removed.
> > 
> > Consider, for example, the scenario possible with the $subject patch:
> > - device_del() starts and notices pm_sleep_rwsem unlocked, so the warning is
> >   not printed
> > - it proceeds and everything before device_pm_remove() succeeds
> > - now, device_suspend() is called and locks dev->sem
> > - device_del() calls device_pm_remove() and blocks on that with the device
> >   partialy removed
> > I think we should prevent this from happening.
> 
> I don't see anything wrong with it.  All that will happen is that the 
> removal will start before the suspend and finish after the resume.

In that case, we'll attempt to call the device's .suspend() and .resume()
routines, but we shouldn't do that, IMHO.

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:39                           ` Alan Stern
  2008-01-06 22:47                             ` Rafael J. Wysocki
@ 2008-01-06 22:47                             ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:47 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > No -- the whole idea here is to print an error message in the system
> > > log if a driver's resume method tries to call device_del().  Deadlock 
> > > is unavoidable in this case, but at least we'll know which driver is 
> > > guilty.
> > 
> > Still, if we do that, we won't need to acquire dev->sem in device_pm_remove()
> > any more.
> 
> There's a window in lock_all_devices() when dpm_list_mtx isn't held.  
> We don't want device_pm_remove() taking an already-locked device off 
> the dpm_locked list at that time.  So we do need to acquire dev->sem in 
> device_pm_remove().

Not if pm_sleep_rwsem is held by device_del(), since in that case we won't
reach lock_all_devices() (device_add() calls device_pm_remove() under
pm_sleep_rwsem already).

> > Apart from this, by acqiring pm_sleep_rwsem for reading in
> > device_del() we can prevent a suspend from starting while the device is being
> > removed.
> > 
> > Consider, for example, the scenario possible with the $subject patch:
> > - device_del() starts and notices pm_sleep_rwsem unlocked, so the warning is
> >   not printed
> > - it proceeds and everything before device_pm_remove() succeeds
> > - now, device_suspend() is called and locks dev->sem
> > - device_del() calls device_pm_remove() and blocks on that with the device
> >   partialy removed
> > I think we should prevent this from happening.
> 
> I don't see anything wrong with it.  All that will happen is that the 
> removal will start before the suspend and finish after the resume.

In that case, we'll attempt to call the device's .suspend() and .resume()
routines, but we shouldn't do that, IMHO.

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:34                         ` Rafael J. Wysocki
  2008-01-06 22:39                           ` Alan Stern
@ 2008-01-06 22:39                           ` Alan Stern
  2008-01-06 22:47                             ` Rafael J. Wysocki
  2008-01-06 22:47                             ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:39 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> > No -- the whole idea here is to print an error message in the system
> > log if a driver's resume method tries to call device_del().  Deadlock 
> > is unavoidable in this case, but at least we'll know which driver is 
> > guilty.
> 
> Still, if we do that, we won't need to acquire dev->sem in device_pm_remove()
> any more.

There's a window in lock_all_devices() when dpm_list_mtx isn't held.  
We don't want device_pm_remove() taking an already-locked device off 
the dpm_locked list at that time.  So we do need to acquire dev->sem in 
device_pm_remove().

> Apart from this, by acqiring pm_sleep_rwsem for reading in
> device_del() we can prevent a suspend from starting while the device is being
> removed.
> 
> Consider, for example, the scenario possible with the $subject patch:
> - device_del() starts and notices pm_sleep_rwsem unlocked, so the warning is
>   not printed
> - it proceeds and everything before device_pm_remove() succeeds
> - now, device_suspend() is called and locks dev->sem
> - device_del() calls device_pm_remove() and blocks on that with the device
>   partialy removed
> I think we should prevent this from happening.

I don't see anything wrong with it.  All that will happen is that the 
removal will start before the suspend and finish after the resume.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:34                         ` Rafael J. Wysocki
@ 2008-01-06 22:39                           ` Alan Stern
  2008-01-06 22:39                           ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:39 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> > No -- the whole idea here is to print an error message in the system
> > log if a driver's resume method tries to call device_del().  Deadlock 
> > is unavoidable in this case, but at least we'll know which driver is 
> > guilty.
> 
> Still, if we do that, we won't need to acquire dev->sem in device_pm_remove()
> any more.

There's a window in lock_all_devices() when dpm_list_mtx isn't held.  
We don't want device_pm_remove() taking an already-locked device off 
the dpm_locked list at that time.  So we do need to acquire dev->sem in 
device_pm_remove().

> Apart from this, by acqiring pm_sleep_rwsem for reading in
> device_del() we can prevent a suspend from starting while the device is being
> removed.
> 
> Consider, for example, the scenario possible with the $subject patch:
> - device_del() starts and notices pm_sleep_rwsem unlocked, so the warning is
>   not printed
> - it proceeds and everything before device_pm_remove() succeeds
> - now, device_suspend() is called and locks dev->sem
> - device_del() calls device_pm_remove() and blocks on that with the device
>   partialy removed
> I think we should prevent this from happening.

I don't see anything wrong with it.  All that will happen is that the 
removal will start before the suspend and finish after the resume.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:21                       ` Alan Stern
@ 2008-01-06 22:34                         ` Rafael J. Wysocki
  2008-01-06 22:39                           ` Alan Stern
  2008-01-06 22:39                           ` Alan Stern
  2008-01-06 22:34                         ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:34 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > Still, shouldn't we fail the removal of the device apart from giving the
> > > warning?
> > 
> > Actually, having thought about it a bit more, I don't see the point in
> > preventing the removal of the device from the list in device_pm_remove() if
> > we allow all of the operations in device_del() preceding it to be performed.
> 
> That's not the issue.  We _don't_ allow all of the operations in 
> device_del() preceding the call to device_pm_remove().  In particular, 
> the call to the device's driver's remove method will deadlock because 
> device_release_driver() always has to acquire dev->sem.
> 
> > Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
> > if locked?
> 
> No -- the whole idea here is to print an error message in the system
> log if a driver's resume method tries to call device_del().  Deadlock 
> is unavoidable in this case, but at least we'll know which driver is 
> guilty.

Still, if we do that, we won't need to acquire dev->sem in device_pm_remove()
any more.  Apart from this, by acqiring pm_sleep_rwsem for reading in
device_del() we can prevent a suspend from starting while the device is being
removed.

Consider, for example, the scenario possible with the $subject patch:
- device_del() starts and notices pm_sleep_rwsem unlocked, so the warning is
  not printed
- it proceeds and everything before device_pm_remove() succeeds
- now, device_suspend() is called and locks dev->sem
- device_del() calls device_pm_remove() and blocks on that with the device
  partialy removed
I think we should prevent this from happening.

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:21                       ` Alan Stern
  2008-01-06 22:34                         ` Rafael J. Wysocki
@ 2008-01-06 22:34                         ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:34 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > Still, shouldn't we fail the removal of the device apart from giving the
> > > warning?
> > 
> > Actually, having thought about it a bit more, I don't see the point in
> > preventing the removal of the device from the list in device_pm_remove() if
> > we allow all of the operations in device_del() preceding it to be performed.
> 
> That's not the issue.  We _don't_ allow all of the operations in 
> device_del() preceding the call to device_pm_remove().  In particular, 
> the call to the device's driver's remove method will deadlock because 
> device_release_driver() always has to acquire dev->sem.
> 
> > Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
> > if locked?
> 
> No -- the whole idea here is to print an error message in the system
> log if a driver's resume method tries to call device_del().  Deadlock 
> is unavoidable in this case, but at least we'll know which driver is 
> guilty.

Still, if we do that, we won't need to acquire dev->sem in device_pm_remove()
any more.  Apart from this, by acqiring pm_sleep_rwsem for reading in
device_del() we can prevent a suspend from starting while the device is being
removed.

Consider, for example, the scenario possible with the $subject patch:
- device_del() starts and notices pm_sleep_rwsem unlocked, so the warning is
  not printed
- it proceeds and everything before device_pm_remove() succeeds
- now, device_suspend() is called and locks dev->sem
- device_del() calls device_pm_remove() and blocks on that with the device
  partialy removed
I think we should prevent this from happening.

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:24                       ` Rafael J. Wysocki
  2008-01-06 22:31                         ` Alan Stern
@ 2008-01-06 22:31                         ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:31 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> Still, our present approach doesn't seem to be correct overall.  For example,
> I think we should prevent a suspend from happening while a device is being
> removed.

We could, however I don't think it's dangerous to allow it.  The two
problems to avoid are (1) messing up the PM device list pointers, and
(2) calling a driver's suspend/resume methods while its remove method
is running.  (1) is handled by the pm_list_mutex and (2) is handled by
locking dev->sem.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:24                       ` Rafael J. Wysocki
@ 2008-01-06 22:31                         ` Alan Stern
  2008-01-06 22:31                         ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:31 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> Still, our present approach doesn't seem to be correct overall.  For example,
> I think we should prevent a suspend from happening while a device is being
> removed.

We could, however I don't think it's dangerous to allow it.  The two
problems to avoid are (1) messing up the PM device list pointers, and
(2) calling a driver's suspend/resume methods while its remove method
is running.  (1) is handled by the pm_list_mutex and (2) is handled by
locking dev->sem.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:11                     ` Alan Stern
@ 2008-01-06 22:24                       ` Rafael J. Wysocki
  2008-01-06 22:31                         ` Alan Stern
  2008-01-06 22:31                         ` Alan Stern
  2008-01-06 22:24                       ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:24 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Sunday, 6 of January 2008, Alan Stern wrote:
> 
> > Still, shouldn't we fail the removal of the device apart from giving the
> > warning?
> 
> We can't.  device_del() can't fail -- it returns void.  Besides, how 
> can a driver hope to deal with an unregistration failure?

Well, right.

Still, our present approach doesn't seem to be correct overall.  For example,
I think we should prevent a suspend from happening while a device is being
removed.

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 22:11                     ` Alan Stern
  2008-01-06 22:24                       ` Rafael J. Wysocki
@ 2008-01-06 22:24                       ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:24 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Sunday, 6 of January 2008, Alan Stern wrote:
> 
> > Still, shouldn't we fail the removal of the device apart from giving the
> > warning?
> 
> We can't.  device_del() can't fail -- it returns void.  Besides, how 
> can a driver hope to deal with an unregistration failure?

Well, right.

Still, our present approach doesn't seem to be correct overall.  For example,
I think we should prevent a suspend from happening while a device is being
removed.

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:57                     ` Rafael J. Wysocki
                                         ` (2 preceding siblings ...)
  2008-01-06 22:21                       ` Alan Stern
@ 2008-01-06 22:21                       ` Alan Stern
  2008-01-06 22:34                         ` Rafael J. Wysocki
  2008-01-06 22:34                         ` Rafael J. Wysocki
  3 siblings, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:21 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> > Still, shouldn't we fail the removal of the device apart from giving the
> > warning?
> 
> Actually, having thought about it a bit more, I don't see the point in
> preventing the removal of the device from the list in device_pm_remove() if
> we allow all of the operations in device_del() preceding it to be performed.

That's not the issue.  We _don't_ allow all of the operations in 
device_del() preceding the call to device_pm_remove().  In particular, 
the call to the device's driver's remove method will deadlock because 
device_release_driver() always has to acquire dev->sem.

> Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
> if locked?

No -- the whole idea here is to print an error message in the system
log if a driver's resume method tries to call device_del().  Deadlock 
is unavoidable in this case, but at least we'll know which driver is 
guilty.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:57                     ` Rafael J. Wysocki
  2008-01-06 22:19                       ` Rafael J. Wysocki
  2008-01-06 22:19                       ` Rafael J. Wysocki
@ 2008-01-06 22:21                       ` Alan Stern
  2008-01-06 22:21                       ` Alan Stern
  3 siblings, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:21 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> > Still, shouldn't we fail the removal of the device apart from giving the
> > warning?
> 
> Actually, having thought about it a bit more, I don't see the point in
> preventing the removal of the device from the list in device_pm_remove() if
> we allow all of the operations in device_del() preceding it to be performed.

That's not the issue.  We _don't_ allow all of the operations in 
device_del() preceding the call to device_pm_remove().  In particular, 
the call to the device's driver's remove method will deadlock because 
device_release_driver() always has to acquire dev->sem.

> Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
> if locked?

No -- the whole idea here is to print an error message in the system
log if a driver's resume method tries to call device_del().  Deadlock 
is unavoidable in this case, but at least we'll know which driver is 
guilty.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:57                     ` Rafael J. Wysocki
@ 2008-01-06 22:19                       ` Rafael J. Wysocki
  2008-01-06 22:19                       ` Rafael J. Wysocki
                                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:19 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Rafael J. Wysocki wrote:
> On Sunday, 6 of January 2008, Rafael J. Wysocki wrote:
> > On Sunday, 6 of January 2008, Alan Stern wrote:
> > > On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > > If you can figure out a way to disable the warning in device_del() for 
> > > > > just the one device being unregistered by 
> > > > > device_pm_destroy_suspended(),
> > > > 
> > > > Something like this, perhaps:
> > > > 
> > > > @@ -905,6 +915,18 @@ void device_del(struct device * dev)
> > > >  	struct device * parent = dev->parent;
> > > >  	struct class_interface *class_intf;
> > > >  
> > > > +	if (down_trylock(&dev->sem)) {
> > > > +		if (pm_sleep_lock()) {
> > > > +			dev_warn(dev, "Illegal %s during suspend\n",
> > > > +				__FUNCTION__);
> > > > +			dump_stack();
> > > > +		} else {
> > > > +			pm_sleep_unlock();
> > > > +		}
> > > > +	} else {
> > > > +		up(&dev->sem);
> > > > +	}
> > > > +
> > > >  	if (parent)
> > > >  		klist_del(&dev->knode_parent);
> > > >  	if (MAJOR(dev->devt))
> > > 
> > > Bizarre, but it should work.
> > 
> > OK
> > 
> > Still, shouldn't we fail the removal of the device apart from giving the
> > warning?
> 
> Actually, having thought about it a bit more, I don't see the point in
> preventing the removal of the device from the list in device_pm_remove() if
> we allow all of the operations in device_del() preceding it to be performed.
> 
> Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
> if locked?

Ugh, the $subject patch looks like a city of races.  I'm struggling to close
them all, but it's getting complicated.

I'll post the result in a new thread.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:57                     ` Rafael J. Wysocki
  2008-01-06 22:19                       ` Rafael J. Wysocki
@ 2008-01-06 22:19                       ` Rafael J. Wysocki
  2008-01-06 22:21                       ` Alan Stern
  2008-01-06 22:21                       ` Alan Stern
  3 siblings, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 22:19 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Rafael J. Wysocki wrote:
> On Sunday, 6 of January 2008, Rafael J. Wysocki wrote:
> > On Sunday, 6 of January 2008, Alan Stern wrote:
> > > On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > > If you can figure out a way to disable the warning in device_del() for 
> > > > > just the one device being unregistered by 
> > > > > device_pm_destroy_suspended(),
> > > > 
> > > > Something like this, perhaps:
> > > > 
> > > > @@ -905,6 +915,18 @@ void device_del(struct device * dev)
> > > >  	struct device * parent = dev->parent;
> > > >  	struct class_interface *class_intf;
> > > >  
> > > > +	if (down_trylock(&dev->sem)) {
> > > > +		if (pm_sleep_lock()) {
> > > > +			dev_warn(dev, "Illegal %s during suspend\n",
> > > > +				__FUNCTION__);
> > > > +			dump_stack();
> > > > +		} else {
> > > > +			pm_sleep_unlock();
> > > > +		}
> > > > +	} else {
> > > > +		up(&dev->sem);
> > > > +	}
> > > > +
> > > >  	if (parent)
> > > >  		klist_del(&dev->knode_parent);
> > > >  	if (MAJOR(dev->devt))
> > > 
> > > Bizarre, but it should work.
> > 
> > OK
> > 
> > Still, shouldn't we fail the removal of the device apart from giving the
> > warning?
> 
> Actually, having thought about it a bit more, I don't see the point in
> preventing the removal of the device from the list in device_pm_remove() if
> we allow all of the operations in device_del() preceding it to be performed.
> 
> Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
> if locked?

Ugh, the $subject patch looks like a city of races.  I'm struggling to close
them all, but it's getting complicated.

I'll post the result in a new thread.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:05                   ` Rafael J. Wysocki
  2008-01-06 19:57                     ` Rafael J. Wysocki
  2008-01-06 19:57                     ` Rafael J. Wysocki
@ 2008-01-06 22:11                     ` Alan Stern
  2008-01-06 22:24                       ` Rafael J. Wysocki
  2008-01-06 22:24                       ` Rafael J. Wysocki
  2008-01-06 22:11                     ` Alan Stern
  3 siblings, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:11 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> On Sunday, 6 of January 2008, Alan Stern wrote:

> Still, shouldn't we fail the removal of the device apart from giving the
> warning?

We can't.  device_del() can't fail -- it returns void.  Besides, how 
can a driver hope to deal with an unregistration failure?

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:05                   ` Rafael J. Wysocki
                                       ` (2 preceding siblings ...)
  2008-01-06 22:11                     ` Alan Stern
@ 2008-01-06 22:11                     ` Alan Stern
  3 siblings, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 22:11 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> On Sunday, 6 of January 2008, Alan Stern wrote:

> Still, shouldn't we fail the removal of the device apart from giving the
> warning?

We can't.  device_del() can't fail -- it returns void.  Besides, how 
can a driver hope to deal with an unregistration failure?

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:05                   ` Rafael J. Wysocki
@ 2008-01-06 19:57                     ` Rafael J. Wysocki
  2008-01-06 22:19                       ` Rafael J. Wysocki
                                         ` (3 more replies)
  2008-01-06 19:57                     ` Rafael J. Wysocki
                                       ` (2 subsequent siblings)
  3 siblings, 4 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 19:57 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Rafael J. Wysocki wrote:
> On Sunday, 6 of January 2008, Alan Stern wrote:
> > On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > > If you can figure out a way to disable the warning in device_del() for 
> > > > just the one device being unregistered by 
> > > > device_pm_destroy_suspended(),
> > > 
> > > Something like this, perhaps:
> > > 
> > > @@ -905,6 +915,18 @@ void device_del(struct device * dev)
> > >  	struct device * parent = dev->parent;
> > >  	struct class_interface *class_intf;
> > >  
> > > +	if (down_trylock(&dev->sem)) {
> > > +		if (pm_sleep_lock()) {
> > > +			dev_warn(dev, "Illegal %s during suspend\n",
> > > +				__FUNCTION__);
> > > +			dump_stack();
> > > +		} else {
> > > +			pm_sleep_unlock();
> > > +		}
> > > +	} else {
> > > +		up(&dev->sem);
> > > +	}
> > > +
> > >  	if (parent)
> > >  		klist_del(&dev->knode_parent);
> > >  	if (MAJOR(dev->devt))
> > 
> > Bizarre, but it should work.
> 
> OK
> 
> Still, shouldn't we fail the removal of the device apart from giving the
> warning?

Actually, having thought about it a bit more, I don't see the point in
preventing the removal of the device from the list in device_pm_remove() if
we allow all of the operations in device_del() preceding it to be performed.

Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
if locked?

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 19:05                   ` Rafael J. Wysocki
  2008-01-06 19:57                     ` Rafael J. Wysocki
@ 2008-01-06 19:57                     ` Rafael J. Wysocki
  2008-01-06 22:11                     ` Alan Stern
  2008-01-06 22:11                     ` Alan Stern
  3 siblings, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 19:57 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Rafael J. Wysocki wrote:
> On Sunday, 6 of January 2008, Alan Stern wrote:
> > On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > > If you can figure out a way to disable the warning in device_del() for 
> > > > just the one device being unregistered by 
> > > > device_pm_destroy_suspended(),
> > > 
> > > Something like this, perhaps:
> > > 
> > > @@ -905,6 +915,18 @@ void device_del(struct device * dev)
> > >  	struct device * parent = dev->parent;
> > >  	struct class_interface *class_intf;
> > >  
> > > +	if (down_trylock(&dev->sem)) {
> > > +		if (pm_sleep_lock()) {
> > > +			dev_warn(dev, "Illegal %s during suspend\n",
> > > +				__FUNCTION__);
> > > +			dump_stack();
> > > +		} else {
> > > +			pm_sleep_unlock();
> > > +		}
> > > +	} else {
> > > +		up(&dev->sem);
> > > +	}
> > > +
> > >  	if (parent)
> > >  		klist_del(&dev->knode_parent);
> > >  	if (MAJOR(dev->devt))
> > 
> > Bizarre, but it should work.
> 
> OK
> 
> Still, shouldn't we fail the removal of the device apart from giving the
> warning?

Actually, having thought about it a bit more, I don't see the point in
preventing the removal of the device from the list in device_pm_remove() if
we allow all of the operations in device_del() preceding it to be performed.

Shouldn't we just take pm_sleep_rwsem in device_del() upfront and block on that
if locked?

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 17:06                 ` Alan Stern
@ 2008-01-06 19:05                   ` Rafael J. Wysocki
  2008-01-06 19:57                     ` Rafael J. Wysocki
                                       ` (3 more replies)
  2008-01-06 19:05                   ` Rafael J. Wysocki
  1 sibling, 4 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 19:05 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > If you can figure out a way to disable the warning in device_del() for 
> > > just the one device being unregistered by 
> > > device_pm_destroy_suspended(),
> > 
> > Something like this, perhaps:
> > 
> > @@ -905,6 +915,18 @@ void device_del(struct device * dev)
> >  	struct device * parent = dev->parent;
> >  	struct class_interface *class_intf;
> >  
> > +	if (down_trylock(&dev->sem)) {
> > +		if (pm_sleep_lock()) {
> > +			dev_warn(dev, "Illegal %s during suspend\n",
> > +				__FUNCTION__);
> > +			dump_stack();
> > +		} else {
> > +			pm_sleep_unlock();
> > +		}
> > +	} else {
> > +		up(&dev->sem);
> > +	}
> > +
> >  	if (parent)
> >  		klist_del(&dev->knode_parent);
> >  	if (MAJOR(dev->devt))
> 
> Bizarre, but it should work.

OK

Still, shouldn't we fail the removal of the device apart from giving the
warning?

> Be sure to include plenty of explanatory comments

I will.

I think that code can be moved to its own function in
drivers/base/power/main.c, btw.

> -- otherwise nobody will be able to figure it out!  :-) 

Well, I guess so. :-)

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 17:06                 ` Alan Stern
  2008-01-06 19:05                   ` Rafael J. Wysocki
@ 2008-01-06 19:05                   ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 19:05 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > If you can figure out a way to disable the warning in device_del() for 
> > > just the one device being unregistered by 
> > > device_pm_destroy_suspended(),
> > 
> > Something like this, perhaps:
> > 
> > @@ -905,6 +915,18 @@ void device_del(struct device * dev)
> >  	struct device * parent = dev->parent;
> >  	struct class_interface *class_intf;
> >  
> > +	if (down_trylock(&dev->sem)) {
> > +		if (pm_sleep_lock()) {
> > +			dev_warn(dev, "Illegal %s during suspend\n",
> > +				__FUNCTION__);
> > +			dump_stack();
> > +		} else {
> > +			pm_sleep_unlock();
> > +		}
> > +	} else {
> > +		up(&dev->sem);
> > +	}
> > +
> >  	if (parent)
> >  		klist_del(&dev->knode_parent);
> >  	if (MAJOR(dev->devt))
> 
> Bizarre, but it should work.

OK

Still, shouldn't we fail the removal of the device apart from giving the
warning?

> Be sure to include plenty of explanatory comments

I will.

I think that code can be moved to its own function in
drivers/base/power/main.c, btw.

> -- otherwise nobody will be able to figure it out!  :-) 

Well, I guess so. :-)

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 13:19               ` Rafael J. Wysocki
@ 2008-01-06 17:06                 ` Alan Stern
  2008-01-06 19:05                   ` Rafael J. Wysocki
  2008-01-06 19:05                   ` Rafael J. Wysocki
  2008-01-06 17:06                 ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 17:06 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> > If you can figure out a way to disable the warning in device_del() for 
> > just the one device being unregistered by 
> > device_pm_destroy_suspended(),
> 
> Something like this, perhaps:
> 
> @@ -905,6 +915,18 @@ void device_del(struct device * dev)
>  	struct device * parent = dev->parent;
>  	struct class_interface *class_intf;
>  
> +	if (down_trylock(&dev->sem)) {
> +		if (pm_sleep_lock()) {
> +			dev_warn(dev, "Illegal %s during suspend\n",
> +				__FUNCTION__);
> +			dump_stack();
> +		} else {
> +			pm_sleep_unlock();
> +		}
> +	} else {
> +		up(&dev->sem);
> +	}
> +
>  	if (parent)
>  		klist_del(&dev->knode_parent);
>  	if (MAJOR(dev->devt))

Bizarre, but it should work.  Be sure to include plenty of explanatory 
comments -- otherwise nobody will be able to figure it out!  :-)

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06 13:19               ` Rafael J. Wysocki
  2008-01-06 17:06                 ` Alan Stern
@ 2008-01-06 17:06                 ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06 17:06 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sun, 6 Jan 2008, Rafael J. Wysocki wrote:

> > If you can figure out a way to disable the warning in device_del() for 
> > just the one device being unregistered by 
> > device_pm_destroy_suspended(),
> 
> Something like this, perhaps:
> 
> @@ -905,6 +915,18 @@ void device_del(struct device * dev)
>  	struct device * parent = dev->parent;
>  	struct class_interface *class_intf;
>  
> +	if (down_trylock(&dev->sem)) {
> +		if (pm_sleep_lock()) {
> +			dev_warn(dev, "Illegal %s during suspend\n",
> +				__FUNCTION__);
> +			dump_stack();
> +		} else {
> +			pm_sleep_unlock();
> +		}
> +	} else {
> +		up(&dev->sem);
> +	}
> +
>  	if (parent)
>  		klist_del(&dev->knode_parent);
>  	if (MAJOR(dev->devt))

Bizarre, but it should work.  Be sure to include plenty of explanatory 
comments -- otherwise nobody will be able to figure it out!  :-)

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06  4:04             ` Alan Stern
@ 2008-01-06 13:19               ` Rafael J. Wysocki
  2008-01-06 17:06                 ` Alan Stern
  2008-01-06 17:06                 ` Alan Stern
  2008-01-06 13:19               ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 13:19 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Saturday, 5 of January 2008, Alan Stern wrote:
> > > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > > Still, even doing that is not enough, since someone can call
> > > > destroy_suspended_device() from a .suspend() routine and then the device
> > > > will end up on a wrong list just as well.
> > > 
> > > That should never happen.  The whole idea of destroy_suspended_device()
> > > is that the device couldn't be resumed and in fact should be
> > > unregistered because it is no longer working or no longer present.  A
> > > suspend routine won't detect this sort of thing since it doesn't try to
> > > resume the device.
> > > 
> > > But it wouldn't hurt to mention in the kerneldoc that 
> > > destroy_suspended_device() is meant to be called only during a system 
> > > resume.
> > 
> > Hmm.  Please have a look at the appended patch.
> > 
> > I have removed the warning from device_del() and used list_empty() to detect
> > removed devices in the .suspend() routines.  Is that viable?
> 
> It's not good.
> 
> The warning in device_del() is vital.  It's what will tell people where
> the problem is when a deadlock occurs during system resume because some
> driver has mistakenly tried to unregister a device at the wrong time.  
> It would have pointed immediately to the msr driver in the case of the
> bug Andrew found, for instance.
> 
> If you can figure out a way to disable the warning in device_del() for 
> just the one device being unregistered by 
> device_pm_destroy_suspended(),

Something like this, perhaps:

@@ -905,6 +915,18 @@ void device_del(struct device * dev)
 	struct device * parent = dev->parent;
 	struct class_interface *class_intf;
 
+	if (down_trylock(&dev->sem)) {
+		if (pm_sleep_lock()) {
+			dev_warn(dev, "Illegal %s during suspend\n",
+				__FUNCTION__);
+			dump_stack();
+		} else {
+			pm_sleep_unlock();
+		}
+	} else {
+		up(&dev->sem);
+	}
+
 	if (parent)
 		klist_del(&dev->knode_parent);
 	if (MAJOR(dev->devt))

> I suppose that would be okay. 

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-06  4:04             ` Alan Stern
  2008-01-06 13:19               ` Rafael J. Wysocki
@ 2008-01-06 13:19               ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-06 13:19 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sunday, 6 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Saturday, 5 of January 2008, Alan Stern wrote:
> > > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > > Still, even doing that is not enough, since someone can call
> > > > destroy_suspended_device() from a .suspend() routine and then the device
> > > > will end up on a wrong list just as well.
> > > 
> > > That should never happen.  The whole idea of destroy_suspended_device()
> > > is that the device couldn't be resumed and in fact should be
> > > unregistered because it is no longer working or no longer present.  A
> > > suspend routine won't detect this sort of thing since it doesn't try to
> > > resume the device.
> > > 
> > > But it wouldn't hurt to mention in the kerneldoc that 
> > > destroy_suspended_device() is meant to be called only during a system 
> > > resume.
> > 
> > Hmm.  Please have a look at the appended patch.
> > 
> > I have removed the warning from device_del() and used list_empty() to detect
> > removed devices in the .suspend() routines.  Is that viable?
> 
> It's not good.
> 
> The warning in device_del() is vital.  It's what will tell people where
> the problem is when a deadlock occurs during system resume because some
> driver has mistakenly tried to unregister a device at the wrong time.  
> It would have pointed immediately to the msr driver in the case of the
> bug Andrew found, for instance.
> 
> If you can figure out a way to disable the warning in device_del() for 
> just the one device being unregistered by 
> device_pm_destroy_suspended(),

Something like this, perhaps:

@@ -905,6 +915,18 @@ void device_del(struct device * dev)
 	struct device * parent = dev->parent;
 	struct class_interface *class_intf;
 
+	if (down_trylock(&dev->sem)) {
+		if (pm_sleep_lock()) {
+			dev_warn(dev, "Illegal %s during suspend\n",
+				__FUNCTION__);
+			dump_stack();
+		} else {
+			pm_sleep_unlock();
+		}
+	} else {
+		up(&dev->sem);
+	}
+
 	if (parent)
 		klist_del(&dev->knode_parent);
 	if (MAJOR(dev->devt))

> I suppose that would be okay. 

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 21:58           ` Rafael J. Wysocki
  2008-01-06  4:04             ` Alan Stern
@ 2008-01-06  4:04             ` Alan Stern
  2008-01-06 13:19               ` Rafael J. Wysocki
  2008-01-06 13:19               ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06  4:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> On Saturday, 5 of January 2008, Alan Stern wrote:
> > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> > > Still, even doing that is not enough, since someone can call
> > > destroy_suspended_device() from a .suspend() routine and then the device
> > > will end up on a wrong list just as well.
> > 
> > That should never happen.  The whole idea of destroy_suspended_device()
> > is that the device couldn't be resumed and in fact should be
> > unregistered because it is no longer working or no longer present.  A
> > suspend routine won't detect this sort of thing since it doesn't try to
> > resume the device.
> > 
> > But it wouldn't hurt to mention in the kerneldoc that 
> > destroy_suspended_device() is meant to be called only during a system 
> > resume.
> 
> Hmm.  Please have a look at the appended patch.
> 
> I have removed the warning from device_del() and used list_empty() to detect
> removed devices in the .suspend() routines.  Is that viable?

It's not good.

The warning in device_del() is vital.  It's what will tell people where
the problem is when a deadlock occurs during system resume because some
driver has mistakenly tried to unregister a device at the wrong time.  
It would have pointed immediately to the msr driver in the case of the
bug Andrew found, for instance.

If you can figure out a way to disable the warning in device_del() for 
just the one device being unregistered by 
device_pm_destroy_suspended(), I suppose that would be okay.

Alan Stern




^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 21:58           ` Rafael J. Wysocki
@ 2008-01-06  4:04             ` Alan Stern
  2008-01-06  4:04             ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-06  4:04 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> On Saturday, 5 of January 2008, Alan Stern wrote:
> > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> > > Still, even doing that is not enough, since someone can call
> > > destroy_suspended_device() from a .suspend() routine and then the device
> > > will end up on a wrong list just as well.
> > 
> > That should never happen.  The whole idea of destroy_suspended_device()
> > is that the device couldn't be resumed and in fact should be
> > unregistered because it is no longer working or no longer present.  A
> > suspend routine won't detect this sort of thing since it doesn't try to
> > resume the device.
> > 
> > But it wouldn't hurt to mention in the kerneldoc that 
> > destroy_suspended_device() is meant to be called only during a system 
> > resume.
> 
> Hmm.  Please have a look at the appended patch.
> 
> I have removed the warning from device_del() and used list_empty() to detect
> removed devices in the .suspend() routines.  Is that viable?

It's not good.

The warning in device_del() is vital.  It's what will tell people where
the problem is when a deadlock occurs during system resume because some
driver has mistakenly tried to unregister a device at the wrong time.  
It would have pointed immediately to the msr driver in the case of the
bug Andrew found, for instance.

If you can figure out a way to disable the warning in device_del() for 
just the one device being unregistered by 
device_pm_destroy_suspended(), I suppose that would be okay.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 21:41         ` Alan Stern
@ 2008-01-05 21:58           ` Rafael J. Wysocki
  2008-01-06  4:04             ` Alan Stern
  2008-01-06  4:04             ` Alan Stern
  2008-01-05 21:58           ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 21:58 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Saturday, 5 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Saturday, 5 of January 2008, Alan Stern wrote:
> > > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > > Another thing to watch out for: Just in case somebody ends up calling
> > > > > destroy_suspended_device(dev) from within dev's own resume method, you 
> > > > > should interchange the resume_device() and the list_move_tail() 
> > > > > calls in dpm_resume().
> > > > 
> > > > However, if we unregister them all at once after releasing pm_sleep_rwsem,
> > > > that shouldn't be necessary, right?
> > > 
> > > It's still necessary, because destroy_suspended_device() still has to
> > > move the device from one list to another.  You don't want it to end up 
> > > on the dpm_locked list.
> > 
> > Hmm.  That means we'd have to do the same thing in dpm_power_up() in case
> > someone calls destroy_suspended_device() from resume_device_early(dev).
> 
> Yes.
> 
> > Still, even doing that is not enough, since someone can call
> > destroy_suspended_device() from a .suspend() routine and then the device
> > will end up on a wrong list just as well.
> 
> That should never happen.  The whole idea of destroy_suspended_device()
> is that the device couldn't be resumed and in fact should be
> unregistered because it is no longer working or no longer present.  A
> suspend routine won't detect this sort of thing since it doesn't try to
> resume the device.
> 
> But it wouldn't hurt to mention in the kerneldoc that 
> destroy_suspended_device() is meant to be called only during a system 
> resume.

Hmm.  Please have a look at the appended patch.

I have removed the warning from device_del() and used list_empty() to detect
removed devices in the .suspend() routines.  Is that viable?

Rafael


---
 arch/x86/kernel/cpuid.c    |    6 
 arch/x86/kernel/msr.c      |    6 
 drivers/base/core.c        |   67 +++++-
 drivers/base/power/main.c  |  454 ++++++++++++++++++++++++++-------------------
 drivers/base/power/power.h |   12 +
 include/linux/device.h     |    8 
 6 files changed, 354 insertions(+), 199 deletions(-)

Index: linux-2.6/drivers/base/core.c
===================================================================
--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -726,11 +726,20 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error;
+
+	error = pm_sleep_lock();
+	if (error) {
+		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
+		dump_stack();
+		return error;
+	}
 
 	dev = get_device(dev);
-	if (!dev || !strlen(dev->bus_id))
-		goto Error;
+	if (!dev || !strlen(dev->bus_id)) {
+		error = -EINVAL;
+		goto Done;
+	}
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
@@ -795,6 +804,7 @@ int device_add(struct device *dev)
 	}
  Done:
 	put_device(dev);
+	pm_sleep_unlock();
 	return error;
  BusError:
 	device_pm_remove(dev);
@@ -1156,14 +1173,11 @@ error:
 EXPORT_SYMBOL_GPL(device_create);
 
 /**
- * device_destroy - removes a device that was created with device_create()
+ * find_device - finds a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
  * @devt: the dev_t of the device that was previously registered
- *
- * This call unregisters and cleans up a device that was created with a
- * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+static struct device *find_device(struct class *class, dev_t devt)
 {
 	struct device *dev = NULL;
 	struct device *dev_tmp;
@@ -1176,12 +1190,49 @@ void device_destroy(struct class *class,
 		}
 	}
 	up(&class->sem);
+	return dev;
+}
+
+/**
+ * device_destroy - removes a device that was created with device_create()
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call unregisters and cleans up a device that was created with a
+ * call to device_create().
+ */
+void device_destroy(struct class *class, dev_t devt)
+{
+	struct device *dev;
 
+	dev = find_device(class, devt);
 	if (dev)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
 
+#ifdef CONFIG_PM_SLEEP
+/**
+ * destroy_suspended_device - asks the PM core to remove a suspended device
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call causes the PM core to release and unregister a suspended device
+ * created with a call to device_create() (devices cannot be unregistered
+ * directly while suspended, since the PM core holds their semaphores at that
+ * time).
+ */
+void destroy_suspended_device(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
+	if (dev)
+		device_pm_destroy_suspended(dev);
+}
+EXPORT_SYMBOL_GPL(destroy_suspended_device);
+#endif /* CONFIG_PM_SLEEP */
+
 /**
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -521,6 +521,14 @@ extern struct device *device_create(stru
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
+#ifdef CONFIG_PM_SLEEP
+extern void destroy_suspended_device(struct class *cls, dev_t devt);
+#else /* !CONFIG_PM_SLEEP */
+static inline void destroy_suspended_device(struct class *cls, dev_t devt)
+{
+	device_destroy(cls, devt);
+}
+#endif /* !CONFIG_PM_SLEEP */
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
Index: linux-2.6/drivers/base/power/power.h
===================================================================
--- linux-2.6.orig/drivers/base/power/power.h
+++ linux-2.6/drivers/base/power/power.h
@@ -20,6 +20,9 @@ static inline struct device *to_device(s
 
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
+extern void device_pm_destroy_suspended(struct device *);
+extern int pm_sleep_lock(void);
+extern void pm_sleep_unlock(void);
 
 #else /* CONFIG_PM_SLEEP */
 
@@ -32,6 +35,15 @@ static inline void device_pm_remove(stru
 {
 }
 
+static inline int pm_sleep_lock(void)
+{
+	return 0;
+}
+
+static inline void pm_sleep_unlock(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PM
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -24,17 +24,38 @@
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/resume-trace.h>
+#include <linux/rwsem.h>
 
 #include "../base.h"
 #include "power.h"
 
+/*
+ * The entries in the dpm_active list are in a depth first order, simply
+ * because children are guaranteed to be discovered after parents, and
+ * are inserted at the back of the list on discovery.
+ *
+ * All the other lists are kept in the same order, for consistency.
+ * However the lists aren't always traversed in the same order.
+ * Semaphores must be acquired from the top (i.e., front) down
+ * and released in the opposite order.  Devices must be suspended
+ * from the bottom (i.e., end) up and resumed in the opposite order.
+ * That way no parent will be suspended while it still has an active
+ * child.
+ *
+ * Since device_pm_add() may be called with a device semaphore held,
+ * we must never try to acquire a device semaphore while holding
+ * dpm_list_mutex.
+ */
+
 LIST_HEAD(dpm_active);
+static LIST_HEAD(dpm_locked);
 static LIST_HEAD(dpm_off);
 static LIST_HEAD(dpm_off_irq);
 
-static DEFINE_MUTEX(dpm_mtx);
 static DEFINE_MUTEX(dpm_list_mtx);
 
+static DECLARE_RWSEM(pm_sleep_rwsem);
+
 int (*platform_enable_wakeup)(struct device *dev, int is_on);
 
 
@@ -53,29 +74,124 @@ void device_pm_remove(struct device *dev
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
+
+	/* Don't remove a device while the PM core has it locked for suspend */
+	down(&dev->sem);
 	mutex_lock(&dpm_list_mtx);
 	dpm_sysfs_remove(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+}
+
+void device_pm_destroy_suspended(struct device *dev)
+{
+	pr_debug("PM: Removing suspended device %s:%s\n",
+		 dev->bus ? dev->bus->name : "No Bus",
+		 kobject_name(&dev->kobj));
+	mutex_lock(&dpm_list_mtx);
+	list_del_init(&dev->power.entry);
+	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+	device_unregister(dev);
+}
+
+/**
+ *	pm_sleep_lock - mutual exclusion for registration and suspend
+ *
+ *	Returns 0 if no suspend is underway and device registration
+ *	may proceed, otherwise -EBUSY.
+ */
+int pm_sleep_lock(void)
+{
+	if (down_read_trylock(&pm_sleep_rwsem))
+		return 0;
+	return -EBUSY;
+}
+
+/**
+ *	pm_sleep_unlock - mutual exclusion for registration and suspend
+ *
+ *	This routine undoes the effect of device_pm_add_lock
+ *	when a device's registration is complete.
+ */
+void pm_sleep_unlock(void)
+{
+	up_read(&pm_sleep_rwsem);
 }
 
 
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device - Restore state for one device.
+ *	resume_device_early - Power on one device (early resume).
  *	@dev:	Device.
  *
+ *	Must be called with interrupts disabled.
  */
-
-static int resume_device(struct device * dev)
+static int resume_device_early(struct device *dev)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	down(&dev->sem);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *
+ *	Walk the dpm_off_irq list and power each device up. This
+ *	is used for devices that required they be powered down with
+ *	interrupts disabled. As devices are powered on, they are moved
+ *	to the dpm_off list.
+ *
+ *	Interrupts must be disabled when calling this.
+ */
+static void dpm_power_up(void)
+{
+	while (!list_empty(&dpm_off_irq)) {
+		struct list_head *entry = dpm_off_irq.next;
+		struct device *dev = to_device(entry);
+
+		list_move_tail(entry, &dpm_off);
+		resume_device_early(dev);
+	}
+}
+
+/**
+ *	device_power_up - Turn on all devices that need special attention.
+ *
+ *	Power on system devices, then devices that required we shut them down
+ *	with interrupts disabled.
+ *
+ *	Must be called with interrupts disabled.
+ */
+void device_power_up(void)
+{
+	sysdev_resume();
+	dpm_power_up();
+}
+EXPORT_SYMBOL_GPL(device_power_up);
+
+/**
+ *	resume_device - Restore state for one device.
+ *	@dev:	Device.
+ *
+ */
+static int resume_device(struct device *dev)
+{
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
 	if (dev->bus && dev->bus->resume) {
 		dev_dbg(dev,"resuming\n");
@@ -92,126 +208,68 @@ static int resume_device(struct device *
 		error = dev->class->resume(dev);
 	}
 
-	up(&dev->sem);
-
 	TRACE_RESUME(error);
 	return error;
 }
 
-
-static int resume_device_early(struct device * dev)
+/**
+ *	dpm_resume - Resume every device.
+ *
+ *	Resume the devices that have either not gone through
+ *	the late suspend, or that did go through it but also
+ *	went through the early resume.
+ *
+ *	Take devices from the dpm_off_list, resume them,
+ *	and put them on the dpm_locked list.
+ */
+static void dpm_resume(void)
 {
-	int error = 0;
+	while(!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.next;
+		struct device *dev = to_device(entry);
 
-	TRACE_DEVICE(dev);
-	TRACE_RESUME(0);
-	if (dev->bus && dev->bus->resume_early) {
-		dev_dbg(dev,"EARLY resume\n");
-		error = dev->bus->resume_early(dev);
+		resume_device(dev);
+		list_move_tail(entry, &dpm_locked);
 	}
-	TRACE_RESUME(error);
-	return error;
 }
 
-/*
- * Resume the devices that have either not gone through
- * the late suspend, or that did go through it but also
- * went through the early resume
+/**
+ *	unlock_all_devices - Release each device's semaphore
+ *
+ *	Go through the dpm_off list.  Put each device on the dpm_active
+ *	list and unlock it.
  */
-static void dpm_resume(void)
+static void unlock_all_devices(void)
 {
 	mutex_lock(&dpm_list_mtx);
-	while(!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.next;
-		struct device * dev = to_device(entry);
-
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
-
-		mutex_unlock(&dpm_list_mtx);
-		resume_device(dev);
-		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
-	}
+ 	while (!list_empty(&dpm_locked)) {
+ 		struct list_head *entry = dpm_locked.prev;
+ 		struct device *dev = to_device(entry);
+
+ 		list_move(entry, &dpm_active);
+ 		up(&dev->sem);
+ 	}
 	mutex_unlock(&dpm_list_mtx);
 }
 
-
 /**
  *	device_resume - Restore state of each device in system.
  *
- *	Walk the dpm_off list, remove each entry, resume the device,
- *	then add it to the dpm_active list.
+ *	Resume all the devices, unlock them all, and allow new
+ *	devices to be registered once again.
  */
-
 void device_resume(void)
 {
 	might_sleep();
-	mutex_lock(&dpm_mtx);
 	dpm_resume();
-	mutex_unlock(&dpm_mtx);
+	unlock_all_devices();
+	up_write(&pm_sleep_rwsem);
 }
-
 EXPORT_SYMBOL_GPL(device_resume);
 
 
-/**
- *	dpm_power_up - Power on some devices.
- *
- *	Walk the dpm_off_irq list and power each device up. This
- *	is used for devices that required they be powered down with
- *	interrupts disabled. As devices are powered on, they are moved
- *	to the dpm_active list.
- *
- *	Interrupts must be disabled when calling this.
- */
-
-static void dpm_power_up(void)
-{
-	while(!list_empty(&dpm_off_irq)) {
-		struct list_head * entry = dpm_off_irq.next;
-		struct device * dev = to_device(entry);
-
-		list_move_tail(entry, &dpm_off);
-		resume_device_early(dev);
-	}
-}
-
-
-/**
- *	device_power_up - Turn on all devices that need special attention.
- *
- *	Power on system devices then devices that required we shut them down
- *	with interrupts disabled.
- *	Called with interrupts disabled.
- */
-
-void device_power_up(void)
-{
-	sysdev_resume();
-	dpm_power_up();
-}
-
-EXPORT_SYMBOL_GPL(device_power_up);
-
-
 /*------------------------- Suspend routines -------------------------*/
 
-/*
- * The entries in the dpm_active list are in a depth first order, simply
- * because children are guaranteed to be discovered after parents, and
- * are inserted at the back of the list on discovery.
- *
- * All list on the suspend path are done in reverse order, so we operate
- * on the leaves of the device tree (or forests, depending on how you want
- * to look at it ;) first. As nodes are removed from the back of the list,
- * they are inserted into the front of their destintation lists.
- *
- * Things are the reverse on the resume path - iterations are done in
- * forward order, and nodes are inserted at the back of their destination
- * lists. This way, the ancestors will be accessed before their descendents.
- */
-
 static inline char *suspend_verb(u32 event)
 {
 	switch (event) {
@@ -222,7 +280,6 @@ static inline char *suspend_verb(u32 eve
 	}
 }
 
-
 static void
 suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
 {
@@ -232,16 +289,70 @@ suspend_device_dbg(struct device *dev, p
 }
 
 /**
- *	suspend_device - Save state of one device.
+ *	suspend_device_late - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state:	Power state device is entering.
+ *
+ *	This is called with interrupts off and only a single CPU running.
  */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
 
-static int suspend_device(struct device * dev, pm_message_t state)
+	if (dev->bus && dev->bus->suspend_late) {
+		suspend_device_dbg(dev, state, "LATE ");
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
+
+/**
+ *	device_power_down - Shut down special devices.
+ *	@state:		Power state to enter.
+ *
+ *	Power down devices that require interrupts to be disabled
+ *	and move them from the dpm_off list to the dpm_off_irq list.
+ *	Then power down system devices.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
+ */
+int device_power_down(pm_message_t state)
+{
+	int error = 0;
+
+	while (!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device_late(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not power down device %s: "
+					"error %d\n",
+					kobject_name(&dev->kobj), error);
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &dpm_off_irq);
+	}
+
+	if (!error)
+		error = sysdev_suspend(state);
+	if (error)
+		dpm_power_up();
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_power_down);
+
+/**
+ *	suspend_device - Save state of one device.
+ *	@dev:	Device.
+ *	@state:	Power state device is entering.
+ */
+int suspend_device(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
 	if (dev->power.power_state.event) {
 		dev_dbg(dev, "PM: suspend %d-->%d\n",
 			dev->power.power_state.event, state.event);
@@ -264,123 +375,96 @@ static int suspend_device(struct device 
 		error = dev->bus->suspend(dev, state);
 		suspend_report_result(dev->bus->suspend, error);
 	}
-	up(&dev->sem);
 	return error;
 }
 
-
-/*
- * This is called with interrupts off, only a single CPU
- * running. We can't acquire a mutex or semaphore (and we don't
- * need the protection)
+/**
+ *	dpm_suspend - Suspend every device.
+ *	@state:	Power state to put each device in.
+ *
+ *	Walk the dpm_locked list.  Suspend each device and move it
+ *	to the dpm_off list.
+ *
+ *	(For historical reasons, if it returns -EAGAIN, that used to mean
+ *	that the device would be called again with interrupts disabled.
+ *	These days, we use the "suspend_late()" callback for that, so we
+ *	print a warning and consider it an error).
  */
-static int suspend_device_late(struct device *dev, pm_message_t state)
+static int dpm_suspend(pm_message_t state)
 {
 	int error = 0;
 
-	if (dev->bus && dev->bus->suspend_late) {
-		suspend_device_dbg(dev, state, "LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
+	while (!list_empty(&dpm_locked)) {
+		struct list_head *entry = dpm_locked.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not suspend device %s: "
+					"error %d%s\n",
+					kobject_name(&dev->kobj),
+					error,
+					(error == -EAGAIN ?
+					" (please convert to suspend_late)" :
+					""));
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+ 			list_move(&dev->power.entry, &dpm_off);
 	}
+
 	return error;
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
- *	@state:		Power state to put each device in.
- *
- *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to the dpm_off list.
- *
- *	(For historical reasons, if it returns -EAGAIN, that used to mean
- *	that the device would be called again with interrupts disabled.
- *	These days, we use the "suspend_late()" callback for that, so we
- *	print a warning and consider it an error).
- *
- *	If we get a different error, try and back out.
- *
- *	If we hit a failure with any of the devices, call device_resume()
- *	above to bring the suspended devices back to life.
+ *	lock_all_devices - Acquire every device's semaphore
  *
+ *	Go through the dpm_active list. Carefully lock each device's
+ *	semaphore and put it in on the dpm_locked list.
  */
-
-int device_suspend(pm_message_t state)
+static void lock_all_devices(void)
 {
-	int error = 0;
-
-	might_sleep();
-	mutex_lock(&dpm_mtx);
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_active) && error == 0) {
-		struct list_head * entry = dpm_active.prev;
-		struct device * dev = to_device(entry);
-
+	while (!list_empty(&dpm_active)) {
+		struct list_head *entry = dpm_active.next;
+		struct device *dev = to_device(entry);
+
+		/* Required locking order is dev->sem first,
+		 * then dpm_list_mutex.  Hence this awkward code.
+		 */
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
-
-		error = suspend_device(dev, state);
-
+		down(&dev->sem);
 		mutex_lock(&dpm_list_mtx);
 
-		/* Check if the device got removed */
-		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off list */
-			if (!error)
-				list_move(&dev->power.entry, &dpm_off);
-		}
-		if (error)
-			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d%s\n",
-				kobject_name(&dev->kobj), error,
-				error == -EAGAIN ? " (please convert to suspend_late)" : "");
+		if (list_empty(entry))
+			up(&dev->sem);		/* Device was removed */
+		else
+			list_move_tail(entry, &dpm_locked);
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
-		dpm_resume();
-
-	mutex_unlock(&dpm_mtx);
-	return error;
 }
 
-EXPORT_SYMBOL_GPL(device_suspend);
-
 /**
- *	device_power_down - Shut down special devices.
- *	@state:		Power state to enter.
+ *	device_suspend - Save state and stop all devices in system.
  *
- *	Walk the dpm_off_irq list, calling ->power_down() for each device that
- *	couldn't power down the device with interrupts enabled. When we're
- *	done, power down system devices.
+ *	Prevent new devices from being registered, then lock all devices
+ *	and suspend them.
  */
-
-int device_power_down(pm_message_t state)
+int device_suspend(pm_message_t state)
 {
-	int error = 0;
-	struct device * dev;
-
-	while (!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.prev;
-
-		dev = to_device(entry);
-		error = suspend_device_late(dev, state);
-		if (error)
-			goto Error;
-		list_move(&dev->power.entry, &dpm_off_irq);
-	}
+	int error;
 
-	error = sysdev_suspend(state);
- Done:
+	might_sleep();
+	down_write(&pm_sleep_rwsem);
+	lock_all_devices();
+	error = dpm_suspend(state);
+	if (error)
+		device_resume();
 	return error;
- Error:
-	printk(KERN_ERR "Could not power down device %s: "
-		"error %d\n", kobject_name(&dev->kobj), error);
-	dpm_power_up();
-	goto Done;
 }
-
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
Index: linux-2.6/arch/x86/kernel/msr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/msr.c
+++ linux-2.6/arch/x86/kernel/msr.c
@@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callb
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = msr_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		msr_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
Index: linux-2.6/arch/x86/kernel/cpuid.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpuid.c
+++ linux-2.6/arch/x86/kernel/cpuid.c
@@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_cal
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = cpuid_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		cpuid_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 21:41         ` Alan Stern
  2008-01-05 21:58           ` Rafael J. Wysocki
@ 2008-01-05 21:58           ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 21:58 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Saturday, 5 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > On Saturday, 5 of January 2008, Alan Stern wrote:
> > > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> > > 
> > > > > Another thing to watch out for: Just in case somebody ends up calling
> > > > > destroy_suspended_device(dev) from within dev's own resume method, you 
> > > > > should interchange the resume_device() and the list_move_tail() 
> > > > > calls in dpm_resume().
> > > > 
> > > > However, if we unregister them all at once after releasing pm_sleep_rwsem,
> > > > that shouldn't be necessary, right?
> > > 
> > > It's still necessary, because destroy_suspended_device() still has to
> > > move the device from one list to another.  You don't want it to end up 
> > > on the dpm_locked list.
> > 
> > Hmm.  That means we'd have to do the same thing in dpm_power_up() in case
> > someone calls destroy_suspended_device() from resume_device_early(dev).
> 
> Yes.
> 
> > Still, even doing that is not enough, since someone can call
> > destroy_suspended_device() from a .suspend() routine and then the device
> > will end up on a wrong list just as well.
> 
> That should never happen.  The whole idea of destroy_suspended_device()
> is that the device couldn't be resumed and in fact should be
> unregistered because it is no longer working or no longer present.  A
> suspend routine won't detect this sort of thing since it doesn't try to
> resume the device.
> 
> But it wouldn't hurt to mention in the kerneldoc that 
> destroy_suspended_device() is meant to be called only during a system 
> resume.

Hmm.  Please have a look at the appended patch.

I have removed the warning from device_del() and used list_empty() to detect
removed devices in the .suspend() routines.  Is that viable?

Rafael


---
 arch/x86/kernel/cpuid.c    |    6 
 arch/x86/kernel/msr.c      |    6 
 drivers/base/core.c        |   67 +++++-
 drivers/base/power/main.c  |  454 ++++++++++++++++++++++++++-------------------
 drivers/base/power/power.h |   12 +
 include/linux/device.h     |    8 
 6 files changed, 354 insertions(+), 199 deletions(-)

Index: linux-2.6/drivers/base/core.c
===================================================================
--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -726,11 +726,20 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error;
+
+	error = pm_sleep_lock();
+	if (error) {
+		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
+		dump_stack();
+		return error;
+	}
 
 	dev = get_device(dev);
-	if (!dev || !strlen(dev->bus_id))
-		goto Error;
+	if (!dev || !strlen(dev->bus_id)) {
+		error = -EINVAL;
+		goto Done;
+	}
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
@@ -795,6 +804,7 @@ int device_add(struct device *dev)
 	}
  Done:
 	put_device(dev);
+	pm_sleep_unlock();
 	return error;
  BusError:
 	device_pm_remove(dev);
@@ -1156,14 +1173,11 @@ error:
 EXPORT_SYMBOL_GPL(device_create);
 
 /**
- * device_destroy - removes a device that was created with device_create()
+ * find_device - finds a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
  * @devt: the dev_t of the device that was previously registered
- *
- * This call unregisters and cleans up a device that was created with a
- * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+static struct device *find_device(struct class *class, dev_t devt)
 {
 	struct device *dev = NULL;
 	struct device *dev_tmp;
@@ -1176,12 +1190,49 @@ void device_destroy(struct class *class,
 		}
 	}
 	up(&class->sem);
+	return dev;
+}
+
+/**
+ * device_destroy - removes a device that was created with device_create()
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call unregisters and cleans up a device that was created with a
+ * call to device_create().
+ */
+void device_destroy(struct class *class, dev_t devt)
+{
+	struct device *dev;
 
+	dev = find_device(class, devt);
 	if (dev)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
 
+#ifdef CONFIG_PM_SLEEP
+/**
+ * destroy_suspended_device - asks the PM core to remove a suspended device
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call causes the PM core to release and unregister a suspended device
+ * created with a call to device_create() (devices cannot be unregistered
+ * directly while suspended, since the PM core holds their semaphores at that
+ * time).
+ */
+void destroy_suspended_device(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
+	if (dev)
+		device_pm_destroy_suspended(dev);
+}
+EXPORT_SYMBOL_GPL(destroy_suspended_device);
+#endif /* CONFIG_PM_SLEEP */
+
 /**
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -521,6 +521,14 @@ extern struct device *device_create(stru
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
+#ifdef CONFIG_PM_SLEEP
+extern void destroy_suspended_device(struct class *cls, dev_t devt);
+#else /* !CONFIG_PM_SLEEP */
+static inline void destroy_suspended_device(struct class *cls, dev_t devt)
+{
+	device_destroy(cls, devt);
+}
+#endif /* !CONFIG_PM_SLEEP */
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
Index: linux-2.6/drivers/base/power/power.h
===================================================================
--- linux-2.6.orig/drivers/base/power/power.h
+++ linux-2.6/drivers/base/power/power.h
@@ -20,6 +20,9 @@ static inline struct device *to_device(s
 
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
+extern void device_pm_destroy_suspended(struct device *);
+extern int pm_sleep_lock(void);
+extern void pm_sleep_unlock(void);
 
 #else /* CONFIG_PM_SLEEP */
 
@@ -32,6 +35,15 @@ static inline void device_pm_remove(stru
 {
 }
 
+static inline int pm_sleep_lock(void)
+{
+	return 0;
+}
+
+static inline void pm_sleep_unlock(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PM
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -24,17 +24,38 @@
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/resume-trace.h>
+#include <linux/rwsem.h>
 
 #include "../base.h"
 #include "power.h"
 
+/*
+ * The entries in the dpm_active list are in a depth first order, simply
+ * because children are guaranteed to be discovered after parents, and
+ * are inserted at the back of the list on discovery.
+ *
+ * All the other lists are kept in the same order, for consistency.
+ * However the lists aren't always traversed in the same order.
+ * Semaphores must be acquired from the top (i.e., front) down
+ * and released in the opposite order.  Devices must be suspended
+ * from the bottom (i.e., end) up and resumed in the opposite order.
+ * That way no parent will be suspended while it still has an active
+ * child.
+ *
+ * Since device_pm_add() may be called with a device semaphore held,
+ * we must never try to acquire a device semaphore while holding
+ * dpm_list_mutex.
+ */
+
 LIST_HEAD(dpm_active);
+static LIST_HEAD(dpm_locked);
 static LIST_HEAD(dpm_off);
 static LIST_HEAD(dpm_off_irq);
 
-static DEFINE_MUTEX(dpm_mtx);
 static DEFINE_MUTEX(dpm_list_mtx);
 
+static DECLARE_RWSEM(pm_sleep_rwsem);
+
 int (*platform_enable_wakeup)(struct device *dev, int is_on);
 
 
@@ -53,29 +74,124 @@ void device_pm_remove(struct device *dev
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
+
+	/* Don't remove a device while the PM core has it locked for suspend */
+	down(&dev->sem);
 	mutex_lock(&dpm_list_mtx);
 	dpm_sysfs_remove(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+}
+
+void device_pm_destroy_suspended(struct device *dev)
+{
+	pr_debug("PM: Removing suspended device %s:%s\n",
+		 dev->bus ? dev->bus->name : "No Bus",
+		 kobject_name(&dev->kobj));
+	mutex_lock(&dpm_list_mtx);
+	list_del_init(&dev->power.entry);
+	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+	device_unregister(dev);
+}
+
+/**
+ *	pm_sleep_lock - mutual exclusion for registration and suspend
+ *
+ *	Returns 0 if no suspend is underway and device registration
+ *	may proceed, otherwise -EBUSY.
+ */
+int pm_sleep_lock(void)
+{
+	if (down_read_trylock(&pm_sleep_rwsem))
+		return 0;
+	return -EBUSY;
+}
+
+/**
+ *	pm_sleep_unlock - mutual exclusion for registration and suspend
+ *
+ *	This routine undoes the effect of device_pm_add_lock
+ *	when a device's registration is complete.
+ */
+void pm_sleep_unlock(void)
+{
+	up_read(&pm_sleep_rwsem);
 }
 
 
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device - Restore state for one device.
+ *	resume_device_early - Power on one device (early resume).
  *	@dev:	Device.
  *
+ *	Must be called with interrupts disabled.
  */
-
-static int resume_device(struct device * dev)
+static int resume_device_early(struct device *dev)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	down(&dev->sem);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *
+ *	Walk the dpm_off_irq list and power each device up. This
+ *	is used for devices that required they be powered down with
+ *	interrupts disabled. As devices are powered on, they are moved
+ *	to the dpm_off list.
+ *
+ *	Interrupts must be disabled when calling this.
+ */
+static void dpm_power_up(void)
+{
+	while (!list_empty(&dpm_off_irq)) {
+		struct list_head *entry = dpm_off_irq.next;
+		struct device *dev = to_device(entry);
+
+		list_move_tail(entry, &dpm_off);
+		resume_device_early(dev);
+	}
+}
+
+/**
+ *	device_power_up - Turn on all devices that need special attention.
+ *
+ *	Power on system devices, then devices that required we shut them down
+ *	with interrupts disabled.
+ *
+ *	Must be called with interrupts disabled.
+ */
+void device_power_up(void)
+{
+	sysdev_resume();
+	dpm_power_up();
+}
+EXPORT_SYMBOL_GPL(device_power_up);
+
+/**
+ *	resume_device - Restore state for one device.
+ *	@dev:	Device.
+ *
+ */
+static int resume_device(struct device *dev)
+{
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
 	if (dev->bus && dev->bus->resume) {
 		dev_dbg(dev,"resuming\n");
@@ -92,126 +208,68 @@ static int resume_device(struct device *
 		error = dev->class->resume(dev);
 	}
 
-	up(&dev->sem);
-
 	TRACE_RESUME(error);
 	return error;
 }
 
-
-static int resume_device_early(struct device * dev)
+/**
+ *	dpm_resume - Resume every device.
+ *
+ *	Resume the devices that have either not gone through
+ *	the late suspend, or that did go through it but also
+ *	went through the early resume.
+ *
+ *	Take devices from the dpm_off_list, resume them,
+ *	and put them on the dpm_locked list.
+ */
+static void dpm_resume(void)
 {
-	int error = 0;
+	while(!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.next;
+		struct device *dev = to_device(entry);
 
-	TRACE_DEVICE(dev);
-	TRACE_RESUME(0);
-	if (dev->bus && dev->bus->resume_early) {
-		dev_dbg(dev,"EARLY resume\n");
-		error = dev->bus->resume_early(dev);
+		resume_device(dev);
+		list_move_tail(entry, &dpm_locked);
 	}
-	TRACE_RESUME(error);
-	return error;
 }
 
-/*
- * Resume the devices that have either not gone through
- * the late suspend, or that did go through it but also
- * went through the early resume
+/**
+ *	unlock_all_devices - Release each device's semaphore
+ *
+ *	Go through the dpm_off list.  Put each device on the dpm_active
+ *	list and unlock it.
  */
-static void dpm_resume(void)
+static void unlock_all_devices(void)
 {
 	mutex_lock(&dpm_list_mtx);
-	while(!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.next;
-		struct device * dev = to_device(entry);
-
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
-
-		mutex_unlock(&dpm_list_mtx);
-		resume_device(dev);
-		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
-	}
+ 	while (!list_empty(&dpm_locked)) {
+ 		struct list_head *entry = dpm_locked.prev;
+ 		struct device *dev = to_device(entry);
+
+ 		list_move(entry, &dpm_active);
+ 		up(&dev->sem);
+ 	}
 	mutex_unlock(&dpm_list_mtx);
 }
 
-
 /**
  *	device_resume - Restore state of each device in system.
  *
- *	Walk the dpm_off list, remove each entry, resume the device,
- *	then add it to the dpm_active list.
+ *	Resume all the devices, unlock them all, and allow new
+ *	devices to be registered once again.
  */
-
 void device_resume(void)
 {
 	might_sleep();
-	mutex_lock(&dpm_mtx);
 	dpm_resume();
-	mutex_unlock(&dpm_mtx);
+	unlock_all_devices();
+	up_write(&pm_sleep_rwsem);
 }
-
 EXPORT_SYMBOL_GPL(device_resume);
 
 
-/**
- *	dpm_power_up - Power on some devices.
- *
- *	Walk the dpm_off_irq list and power each device up. This
- *	is used for devices that required they be powered down with
- *	interrupts disabled. As devices are powered on, they are moved
- *	to the dpm_active list.
- *
- *	Interrupts must be disabled when calling this.
- */
-
-static void dpm_power_up(void)
-{
-	while(!list_empty(&dpm_off_irq)) {
-		struct list_head * entry = dpm_off_irq.next;
-		struct device * dev = to_device(entry);
-
-		list_move_tail(entry, &dpm_off);
-		resume_device_early(dev);
-	}
-}
-
-
-/**
- *	device_power_up - Turn on all devices that need special attention.
- *
- *	Power on system devices then devices that required we shut them down
- *	with interrupts disabled.
- *	Called with interrupts disabled.
- */
-
-void device_power_up(void)
-{
-	sysdev_resume();
-	dpm_power_up();
-}
-
-EXPORT_SYMBOL_GPL(device_power_up);
-
-
 /*------------------------- Suspend routines -------------------------*/
 
-/*
- * The entries in the dpm_active list are in a depth first order, simply
- * because children are guaranteed to be discovered after parents, and
- * are inserted at the back of the list on discovery.
- *
- * All list on the suspend path are done in reverse order, so we operate
- * on the leaves of the device tree (or forests, depending on how you want
- * to look at it ;) first. As nodes are removed from the back of the list,
- * they are inserted into the front of their destintation lists.
- *
- * Things are the reverse on the resume path - iterations are done in
- * forward order, and nodes are inserted at the back of their destination
- * lists. This way, the ancestors will be accessed before their descendents.
- */
-
 static inline char *suspend_verb(u32 event)
 {
 	switch (event) {
@@ -222,7 +280,6 @@ static inline char *suspend_verb(u32 eve
 	}
 }
 
-
 static void
 suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
 {
@@ -232,16 +289,70 @@ suspend_device_dbg(struct device *dev, p
 }
 
 /**
- *	suspend_device - Save state of one device.
+ *	suspend_device_late - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state:	Power state device is entering.
+ *
+ *	This is called with interrupts off and only a single CPU running.
  */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
 
-static int suspend_device(struct device * dev, pm_message_t state)
+	if (dev->bus && dev->bus->suspend_late) {
+		suspend_device_dbg(dev, state, "LATE ");
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
+
+/**
+ *	device_power_down - Shut down special devices.
+ *	@state:		Power state to enter.
+ *
+ *	Power down devices that require interrupts to be disabled
+ *	and move them from the dpm_off list to the dpm_off_irq list.
+ *	Then power down system devices.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
+ */
+int device_power_down(pm_message_t state)
+{
+	int error = 0;
+
+	while (!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device_late(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not power down device %s: "
+					"error %d\n",
+					kobject_name(&dev->kobj), error);
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &dpm_off_irq);
+	}
+
+	if (!error)
+		error = sysdev_suspend(state);
+	if (error)
+		dpm_power_up();
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_power_down);
+
+/**
+ *	suspend_device - Save state of one device.
+ *	@dev:	Device.
+ *	@state:	Power state device is entering.
+ */
+int suspend_device(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
 	if (dev->power.power_state.event) {
 		dev_dbg(dev, "PM: suspend %d-->%d\n",
 			dev->power.power_state.event, state.event);
@@ -264,123 +375,96 @@ static int suspend_device(struct device 
 		error = dev->bus->suspend(dev, state);
 		suspend_report_result(dev->bus->suspend, error);
 	}
-	up(&dev->sem);
 	return error;
 }
 
-
-/*
- * This is called with interrupts off, only a single CPU
- * running. We can't acquire a mutex or semaphore (and we don't
- * need the protection)
+/**
+ *	dpm_suspend - Suspend every device.
+ *	@state:	Power state to put each device in.
+ *
+ *	Walk the dpm_locked list.  Suspend each device and move it
+ *	to the dpm_off list.
+ *
+ *	(For historical reasons, if it returns -EAGAIN, that used to mean
+ *	that the device would be called again with interrupts disabled.
+ *	These days, we use the "suspend_late()" callback for that, so we
+ *	print a warning and consider it an error).
  */
-static int suspend_device_late(struct device *dev, pm_message_t state)
+static int dpm_suspend(pm_message_t state)
 {
 	int error = 0;
 
-	if (dev->bus && dev->bus->suspend_late) {
-		suspend_device_dbg(dev, state, "LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
+	while (!list_empty(&dpm_locked)) {
+		struct list_head *entry = dpm_locked.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not suspend device %s: "
+					"error %d%s\n",
+					kobject_name(&dev->kobj),
+					error,
+					(error == -EAGAIN ?
+					" (please convert to suspend_late)" :
+					""));
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+ 			list_move(&dev->power.entry, &dpm_off);
 	}
+
 	return error;
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
- *	@state:		Power state to put each device in.
- *
- *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to the dpm_off list.
- *
- *	(For historical reasons, if it returns -EAGAIN, that used to mean
- *	that the device would be called again with interrupts disabled.
- *	These days, we use the "suspend_late()" callback for that, so we
- *	print a warning and consider it an error).
- *
- *	If we get a different error, try and back out.
- *
- *	If we hit a failure with any of the devices, call device_resume()
- *	above to bring the suspended devices back to life.
+ *	lock_all_devices - Acquire every device's semaphore
  *
+ *	Go through the dpm_active list. Carefully lock each device's
+ *	semaphore and put it in on the dpm_locked list.
  */
-
-int device_suspend(pm_message_t state)
+static void lock_all_devices(void)
 {
-	int error = 0;
-
-	might_sleep();
-	mutex_lock(&dpm_mtx);
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_active) && error == 0) {
-		struct list_head * entry = dpm_active.prev;
-		struct device * dev = to_device(entry);
-
+	while (!list_empty(&dpm_active)) {
+		struct list_head *entry = dpm_active.next;
+		struct device *dev = to_device(entry);
+
+		/* Required locking order is dev->sem first,
+		 * then dpm_list_mutex.  Hence this awkward code.
+		 */
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
-
-		error = suspend_device(dev, state);
-
+		down(&dev->sem);
 		mutex_lock(&dpm_list_mtx);
 
-		/* Check if the device got removed */
-		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off list */
-			if (!error)
-				list_move(&dev->power.entry, &dpm_off);
-		}
-		if (error)
-			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d%s\n",
-				kobject_name(&dev->kobj), error,
-				error == -EAGAIN ? " (please convert to suspend_late)" : "");
+		if (list_empty(entry))
+			up(&dev->sem);		/* Device was removed */
+		else
+			list_move_tail(entry, &dpm_locked);
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
-		dpm_resume();
-
-	mutex_unlock(&dpm_mtx);
-	return error;
 }
 
-EXPORT_SYMBOL_GPL(device_suspend);
-
 /**
- *	device_power_down - Shut down special devices.
- *	@state:		Power state to enter.
+ *	device_suspend - Save state and stop all devices in system.
  *
- *	Walk the dpm_off_irq list, calling ->power_down() for each device that
- *	couldn't power down the device with interrupts enabled. When we're
- *	done, power down system devices.
+ *	Prevent new devices from being registered, then lock all devices
+ *	and suspend them.
  */
-
-int device_power_down(pm_message_t state)
+int device_suspend(pm_message_t state)
 {
-	int error = 0;
-	struct device * dev;
-
-	while (!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.prev;
-
-		dev = to_device(entry);
-		error = suspend_device_late(dev, state);
-		if (error)
-			goto Error;
-		list_move(&dev->power.entry, &dpm_off_irq);
-	}
+	int error;
 
-	error = sysdev_suspend(state);
- Done:
+	might_sleep();
+	down_write(&pm_sleep_rwsem);
+	lock_all_devices();
+	error = dpm_suspend(state);
+	if (error)
+		device_resume();
 	return error;
- Error:
-	printk(KERN_ERR "Could not power down device %s: "
-		"error %d\n", kobject_name(&dev->kobj), error);
-	dpm_power_up();
-	goto Done;
 }
-
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
Index: linux-2.6/arch/x86/kernel/msr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/msr.c
+++ linux-2.6/arch/x86/kernel/msr.c
@@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callb
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = msr_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		msr_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
Index: linux-2.6/arch/x86/kernel/cpuid.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpuid.c
+++ linux-2.6/arch/x86/kernel/cpuid.c
@@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_cal
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = cpuid_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		cpuid_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 21:13       ` Rafael J. Wysocki
@ 2008-01-05 21:41         ` Alan Stern
  2008-01-05 21:58           ` Rafael J. Wysocki
  2008-01-05 21:58           ` Rafael J. Wysocki
  2008-01-05 21:41         ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-05 21:41 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> On Saturday, 5 of January 2008, Alan Stern wrote:
> > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > > Another thing to watch out for: Just in case somebody ends up calling
> > > > destroy_suspended_device(dev) from within dev's own resume method, you 
> > > > should interchange the resume_device() and the list_move_tail() 
> > > > calls in dpm_resume().
> > > 
> > > However, if we unregister them all at once after releasing pm_sleep_rwsem,
> > > that shouldn't be necessary, right?
> > 
> > It's still necessary, because destroy_suspended_device() still has to
> > move the device from one list to another.  You don't want it to end up 
> > on the dpm_locked list.
> 
> Hmm.  That means we'd have to do the same thing in dpm_power_up() in case
> someone calls destroy_suspended_device() from resume_device_early(dev).

Yes.

> Still, even doing that is not enough, since someone can call
> destroy_suspended_device() from a .suspend() routine and then the device
> will end up on a wrong list just as well.

That should never happen.  The whole idea of destroy_suspended_device()
is that the device couldn't be resumed and in fact should be
unregistered because it is no longer working or no longer present.  A
suspend routine won't detect this sort of thing since it doesn't try to
resume the device.

But it wouldn't hurt to mention in the kerneldoc that 
destroy_suspended_device() is meant to be called only during a system 
resume.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 21:13       ` Rafael J. Wysocki
  2008-01-05 21:41         ` Alan Stern
@ 2008-01-05 21:41         ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-05 21:41 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> On Saturday, 5 of January 2008, Alan Stern wrote:
> > On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> > 
> > > > Another thing to watch out for: Just in case somebody ends up calling
> > > > destroy_suspended_device(dev) from within dev's own resume method, you 
> > > > should interchange the resume_device() and the list_move_tail() 
> > > > calls in dpm_resume().
> > > 
> > > However, if we unregister them all at once after releasing pm_sleep_rwsem,
> > > that shouldn't be necessary, right?
> > 
> > It's still necessary, because destroy_suspended_device() still has to
> > move the device from one list to another.  You don't want it to end up 
> > on the dpm_locked list.
> 
> Hmm.  That means we'd have to do the same thing in dpm_power_up() in case
> someone calls destroy_suspended_device() from resume_device_early(dev).

Yes.

> Still, even doing that is not enough, since someone can call
> destroy_suspended_device() from a .suspend() routine and then the device
> will end up on a wrong list just as well.

That should never happen.  The whole idea of destroy_suspended_device()
is that the device couldn't be resumed and in fact should be
unregistered because it is no longer working or no longer present.  A
suspend routine won't detect this sort of thing since it doesn't try to
resume the device.

But it wouldn't hurt to mention in the kerneldoc that 
destroy_suspended_device() is meant to be called only during a system 
resume.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 20:39     ` Alan Stern
  2008-01-05 21:13       ` Rafael J. Wysocki
@ 2008-01-05 21:13       ` Rafael J. Wysocki
  2008-01-05 21:41         ` Alan Stern
  2008-01-05 21:41         ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 21:13 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Saturday, 5 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > Another thing to watch out for: Just in case somebody ends up calling
> > > destroy_suspended_device(dev) from within dev's own resume method, you 
> > > should interchange the resume_device() and the list_move_tail() 
> > > calls in dpm_resume().
> > 
> > However, if we unregister them all at once after releasing pm_sleep_rwsem,
> > that shouldn't be necessary, right?
> 
> It's still necessary, because destroy_suspended_device() still has to
> move the device from one list to another.  You don't want it to end up 
> on the dpm_locked list.

Hmm.  That means we'd have to do the same thing in dpm_power_up() in case
someone calls destroy_suspended_device() from resume_device_early(dev).

Still, even doing that is not enough, since someone can call
destroy_suspended_device() from a .suspend() routine and then the device
will end up on a wrong list just as well.

Greetings,
Rafael



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 20:39     ` Alan Stern
@ 2008-01-05 21:13       ` Rafael J. Wysocki
  2008-01-05 21:13       ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 21:13 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Saturday, 5 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > > Another thing to watch out for: Just in case somebody ends up calling
> > > destroy_suspended_device(dev) from within dev's own resume method, you 
> > > should interchange the resume_device() and the list_move_tail() 
> > > calls in dpm_resume().
> > 
> > However, if we unregister them all at once after releasing pm_sleep_rwsem,
> > that shouldn't be necessary, right?
> 
> It's still necessary, because destroy_suspended_device() still has to
> move the device from one list to another.  You don't want it to end up 
> on the dpm_locked list.

Hmm.  That means we'd have to do the same thing in dpm_power_up() in case
someone calls destroy_suspended_device() from resume_device_early(dev).

Still, even doing that is not enough, since someone can call
destroy_suspended_device() from a .suspend() routine and then the device
will end up on a wrong list just as well.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 20:19   ` Rafael J. Wysocki
@ 2008-01-05 20:39     ` Alan Stern
  2008-01-05 21:13       ` Rafael J. Wysocki
  2008-01-05 21:13       ` Rafael J. Wysocki
  2008-01-05 20:39     ` Alan Stern
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-05 20:39 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> > Another thing to watch out for: Just in case somebody ends up calling
> > destroy_suspended_device(dev) from within dev's own resume method, you 
> > should interchange the resume_device() and the list_move_tail() 
> > calls in dpm_resume().
> 
> However, if we unregister them all at once after releasing pm_sleep_rwsem,
> that shouldn't be necessary, right?

It's still necessary, because destroy_suspended_device() still has to
move the device from one list to another.  You don't want it to end up 
on the dpm_locked list.

Alan Stern


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 20:19   ` Rafael J. Wysocki
  2008-01-05 20:39     ` Alan Stern
@ 2008-01-05 20:39     ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-05 20:39 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> > Another thing to watch out for: Just in case somebody ends up calling
> > destroy_suspended_device(dev) from within dev's own resume method, you 
> > should interchange the resume_device() and the list_move_tail() 
> > calls in dpm_resume().
> 
> However, if we unregister them all at once after releasing pm_sleep_rwsem,
> that shouldn't be necessary, right?

It's still necessary, because destroy_suspended_device() still has to
move the device from one list to another.  You don't want it to end up 
on the dpm_locked list.

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 20:08 ` Alan Stern
@ 2008-01-05 20:19   ` Rafael J. Wysocki
  2008-01-05 20:39     ` Alan Stern
  2008-01-05 20:39     ` Alan Stern
  2008-01-05 20:19   ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 20:19 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Saturday, 5 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > Greg, Andrew,
> > 
> > The appended patch is a replacement for
> > gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch that deadlocked
> > suspend and hibernation on some systems.
> > 
> > Please consider for applying.
> 
> This warning message:
> 
> > @@ -905,6 +915,13 @@ void device_del(struct device * dev)
> >  	struct device * parent = dev->parent;
> >  	struct class_interface *class_intf;
> >  
> > +	if (pm_sleep_lock()) {
> > +		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
> > +		dump_stack();
> > +	} else {
> 
> will unavoidably be triggered by this code:

Ah, my fault, sorry.

> > +void device_pm_destroy_suspended(struct device *dev)
> > +{
> > +	pr_debug("PM: Removing suspended device %s:%s\n",
> > +		 dev->bus ? dev->bus->name : "No Bus",
> > +		 kobject_name(&dev->kobj));
> > +	mutex_lock(&dpm_list_mtx);
> > +	list_del_init(&dev->power.entry);
> > +	mutex_unlock(&dpm_list_mtx);
> > +	up(&dev->sem);
> > +	device_unregister(dev);
> > +}
> 
> since the call to device_del() will occur while the pm_sleep_rwsem is
> still locked for writing.  That's why I suggested not unregistering
> these devices until after everything else has been resumed and the
> rwsem has been dropped.

Hmm, well.  I'll go back to the previous version, then.  Sorry for the mess.

> Another thing to watch out for: Just in case somebody ends up calling
> destroy_suspended_device(dev) from within dev's own resume method, you 
> should interchange the resume_device() and the list_move_tail() 
> calls in dpm_resume().

However, if we unregister them all at once after releasing pm_sleep_rwsem,
that shouldn't be necessary, right?

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 20:08 ` Alan Stern
  2008-01-05 20:19   ` Rafael J. Wysocki
@ 2008-01-05 20:19   ` Rafael J. Wysocki
  1 sibling, 0 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 20:19 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Saturday, 5 of January 2008, Alan Stern wrote:
> On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:
> 
> > Greg, Andrew,
> > 
> > The appended patch is a replacement for
> > gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch that deadlocked
> > suspend and hibernation on some systems.
> > 
> > Please consider for applying.
> 
> This warning message:
> 
> > @@ -905,6 +915,13 @@ void device_del(struct device * dev)
> >  	struct device * parent = dev->parent;
> >  	struct class_interface *class_intf;
> >  
> > +	if (pm_sleep_lock()) {
> > +		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
> > +		dump_stack();
> > +	} else {
> 
> will unavoidably be triggered by this code:

Ah, my fault, sorry.

> > +void device_pm_destroy_suspended(struct device *dev)
> > +{
> > +	pr_debug("PM: Removing suspended device %s:%s\n",
> > +		 dev->bus ? dev->bus->name : "No Bus",
> > +		 kobject_name(&dev->kobj));
> > +	mutex_lock(&dpm_list_mtx);
> > +	list_del_init(&dev->power.entry);
> > +	mutex_unlock(&dpm_list_mtx);
> > +	up(&dev->sem);
> > +	device_unregister(dev);
> > +}
> 
> since the call to device_del() will occur while the pm_sleep_rwsem is
> still locked for writing.  That's why I suggested not unregistering
> these devices until after everything else has been resumed and the
> rwsem has been dropped.

Hmm, well.  I'll go back to the previous version, then.  Sorry for the mess.

> Another thing to watch out for: Just in case somebody ends up calling
> destroy_suspended_device(dev) from within dev's own resume method, you 
> should interchange the resume_device() and the list_move_tail() 
> calls in dpm_resume().

However, if we unregister them all at once after releasing pm_sleep_rwsem,
that shouldn't be necessary, right?

Rafael

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 18:36 Rafael J. Wysocki
  2008-01-05 20:08 ` Alan Stern
@ 2008-01-05 20:08 ` Alan Stern
  2008-01-05 20:19   ` Rafael J. Wysocki
  2008-01-05 20:19   ` Rafael J. Wysocki
  1 sibling, 2 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-05 20:08 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, Andrew Morton, Len Brown, Ingo Molnar,
	ACPI Devel Maling List, LKML, pm list

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> Greg, Andrew,
> 
> The appended patch is a replacement for
> gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch that deadlocked
> suspend and hibernation on some systems.
> 
> Please consider for applying.

This warning message:

> @@ -905,6 +915,13 @@ void device_del(struct device * dev)
>  	struct device * parent = dev->parent;
>  	struct class_interface *class_intf;
>  
> +	if (pm_sleep_lock()) {
> +		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
> +		dump_stack();
> +	} else {

will unavoidably be triggered by this code:

> +void device_pm_destroy_suspended(struct device *dev)
> +{
> +	pr_debug("PM: Removing suspended device %s:%s\n",
> +		 dev->bus ? dev->bus->name : "No Bus",
> +		 kobject_name(&dev->kobj));
> +	mutex_lock(&dpm_list_mtx);
> +	list_del_init(&dev->power.entry);
> +	mutex_unlock(&dpm_list_mtx);
> +	up(&dev->sem);
> +	device_unregister(dev);
> +}

since the call to device_del() will occur while the pm_sleep_rwsem is
still locked for writing.  That's why I suggested not unregistering
these devices until after everything else has been resumed and the
rwsem has been dropped.

Another thing to watch out for: Just in case somebody ends up calling
destroy_suspended_device(dev) from within dev's own resume method, you 
should interchange the resume_device() and the list_move_tail() 
calls in dpm_resume().

Alan Stern




^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH] PM: Acquire device locks on suspend
  2008-01-05 18:36 Rafael J. Wysocki
@ 2008-01-05 20:08 ` Alan Stern
  2008-01-05 20:08 ` Alan Stern
  1 sibling, 0 replies; 70+ messages in thread
From: Alan Stern @ 2008-01-05 20:08 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Greg KH, LKML, ACPI Devel Maling List, pm list, Andrew Morton,
	Ingo Molnar

On Sat, 5 Jan 2008, Rafael J. Wysocki wrote:

> Greg, Andrew,
> 
> The appended patch is a replacement for
> gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch that deadlocked
> suspend and hibernation on some systems.
> 
> Please consider for applying.

This warning message:

> @@ -905,6 +915,13 @@ void device_del(struct device * dev)
>  	struct device * parent = dev->parent;
>  	struct class_interface *class_intf;
>  
> +	if (pm_sleep_lock()) {
> +		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
> +		dump_stack();
> +	} else {

will unavoidably be triggered by this code:

> +void device_pm_destroy_suspended(struct device *dev)
> +{
> +	pr_debug("PM: Removing suspended device %s:%s\n",
> +		 dev->bus ? dev->bus->name : "No Bus",
> +		 kobject_name(&dev->kobj));
> +	mutex_lock(&dpm_list_mtx);
> +	list_del_init(&dev->power.entry);
> +	mutex_unlock(&dpm_list_mtx);
> +	up(&dev->sem);
> +	device_unregister(dev);
> +}

since the call to device_del() will occur while the pm_sleep_rwsem is
still locked for writing.  That's why I suggested not unregistering
these devices until after everything else has been resumed and the
rwsem has been dropped.

Another thing to watch out for: Just in case somebody ends up calling
destroy_suspended_device(dev) from within dev's own resume method, you 
should interchange the resume_device() and the list_move_tail() 
calls in dpm_resume().

Alan Stern

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH] PM: Acquire device locks on suspend
@ 2008-01-05 18:36 Rafael J. Wysocki
  2008-01-05 20:08 ` Alan Stern
  2008-01-05 20:08 ` Alan Stern
  0 siblings, 2 replies; 70+ messages in thread
From: Rafael J. Wysocki @ 2008-01-05 18:36 UTC (permalink / raw)
  To: Greg KH, Andrew Morton
  Cc: Alan Stern, Len Brown, Ingo Molnar, ACPI Devel Maling List, LKML,
	pm list

Greg, Andrew,

The appended patch is a replacement for
gregkh-driver-pm-acquire-device-locks-prior-to-suspending.patch that deadlocked
suspend and hibernation on some systems.

Please consider for applying.

Thanks,
Rafael

---
From: Alan Stern <stern@rowland.harvard.edu>, Rafael J. Wysocki <rjw@sisk.pl>

This patch reorganizes the way suspend and resume notifications are
sent to drivers.  The major changes are that now the PM core acquires
every device semaphore before calling the methods, and calls to
device_add() during suspends will fail.

It also provides a way to safely remove a suspended device with the help of
the PM core, by using the destroy_suspended_device() callback introduced
specifically for this purpose, and updates two drivers (msr and cpuid) that need
to do that.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/cpuid.c    |    6 
 arch/x86/kernel/msr.c      |    6 
 drivers/base/core.c        |   67 +++++-
 drivers/base/power/main.c  |  452 ++++++++++++++++++++++++++-------------------
 drivers/base/power/power.h |   12 +
 include/linux/device.h     |    8 
 6 files changed, 352 insertions(+), 199 deletions(-)

Index: linux-2.6/drivers/base/core.c
===================================================================
--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -726,11 +726,20 @@ int device_add(struct device *dev)
 {
 	struct device *parent = NULL;
 	struct class_interface *class_intf;
-	int error = -EINVAL;
+	int error;
+
+	error = pm_sleep_lock();
+	if (error) {
+		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
+		dump_stack();
+		return error;
+	}
 
 	dev = get_device(dev);
-	if (!dev || !strlen(dev->bus_id))
-		goto Error;
+	if (!dev || !strlen(dev->bus_id)) {
+		error = -EINVAL;
+		goto Done;
+	}
 
 	pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id);
 
@@ -795,6 +804,7 @@ int device_add(struct device *dev)
 	}
  Done:
 	put_device(dev);
+	pm_sleep_unlock();
 	return error;
  BusError:
 	device_pm_remove(dev);
@@ -905,6 +915,13 @@ void device_del(struct device * dev)
 	struct device * parent = dev->parent;
 	struct class_interface *class_intf;
 
+	if (pm_sleep_lock()) {
+		dev_warn(dev, "Illegal %s during suspend\n", __FUNCTION__);
+		dump_stack();
+	} else {
+		pm_sleep_unlock();
+	}
+
 	if (parent)
 		klist_del(&dev->knode_parent);
 	if (MAJOR(dev->devt))
@@ -1156,14 +1173,11 @@ error:
 EXPORT_SYMBOL_GPL(device_create);
 
 /**
- * device_destroy - removes a device that was created with device_create()
+ * find_device - finds a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
  * @devt: the dev_t of the device that was previously registered
- *
- * This call unregisters and cleans up a device that was created with a
- * call to device_create().
  */
-void device_destroy(struct class *class, dev_t devt)
+static struct device *find_device(struct class *class, dev_t devt)
 {
 	struct device *dev = NULL;
 	struct device *dev_tmp;
@@ -1176,12 +1190,49 @@ void device_destroy(struct class *class,
 		}
 	}
 	up(&class->sem);
+	return dev;
+}
+
+/**
+ * device_destroy - removes a device that was created with device_create()
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call unregisters and cleans up a device that was created with a
+ * call to device_create().
+ */
+void device_destroy(struct class *class, dev_t devt)
+{
+	struct device *dev;
 
+	dev = find_device(class, devt);
 	if (dev)
 		device_unregister(dev);
 }
 EXPORT_SYMBOL_GPL(device_destroy);
 
+#ifdef CONFIG_PM_SLEEP
+/**
+ * destroy_suspended_device - asks the PM core to remove a suspended device
+ * @class: pointer to the struct class that this device was registered with
+ * @devt: the dev_t of the device that was previously registered
+ *
+ * This call causes the PM core to release and unregister a suspended device
+ * created with a call to device_create() (devices cannot be unregistered
+ * directly while suspended, since the PM core holds their semaphores at that
+ * time).
+ */
+void destroy_suspended_device(struct class *class, dev_t devt)
+{
+	struct device *dev;
+
+	dev = find_device(class, devt);
+	if (dev)
+		device_pm_destroy_suspended(dev);
+}
+EXPORT_SYMBOL_GPL(destroy_suspended_device);
+#endif /* CONFIG_PM_SLEEP */
+
 /**
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
Index: linux-2.6/include/linux/device.h
===================================================================
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -521,6 +521,14 @@ extern struct device *device_create(stru
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
+#ifdef CONFIG_PM_SLEEP
+extern void destroy_suspended_device(struct class *cls, dev_t devt);
+#else /* !CONFIG_PM_SLEEP */
+static inline void destroy_suspended_device(struct class *cls, dev_t devt)
+{
+	device_destroy(cls, devt);
+}
+#endif /* !CONFIG_PM_SLEEP */
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
Index: linux-2.6/drivers/base/power/power.h
===================================================================
--- linux-2.6.orig/drivers/base/power/power.h
+++ linux-2.6/drivers/base/power/power.h
@@ -20,6 +20,9 @@ static inline struct device *to_device(s
 
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
+extern void device_pm_destroy_suspended(struct device *);
+extern int pm_sleep_lock(void);
+extern void pm_sleep_unlock(void);
 
 #else /* CONFIG_PM_SLEEP */
 
@@ -32,6 +35,15 @@ static inline void device_pm_remove(stru
 {
 }
 
+static inline int pm_sleep_lock(void)
+{
+	return 0;
+}
+
+static inline void pm_sleep_unlock(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PM
Index: linux-2.6/drivers/base/power/main.c
===================================================================
--- linux-2.6.orig/drivers/base/power/main.c
+++ linux-2.6/drivers/base/power/main.c
@@ -24,17 +24,38 @@
 #include <linux/mutex.h>
 #include <linux/pm.h>
 #include <linux/resume-trace.h>
+#include <linux/rwsem.h>
 
 #include "../base.h"
 #include "power.h"
 
+/*
+ * The entries in the dpm_active list are in a depth first order, simply
+ * because children are guaranteed to be discovered after parents, and
+ * are inserted at the back of the list on discovery.
+ *
+ * All the other lists are kept in the same order, for consistency.
+ * However the lists aren't always traversed in the same order.
+ * Semaphores must be acquired from the top (i.e., front) down
+ * and released in the opposite order.  Devices must be suspended
+ * from the bottom (i.e., end) up and resumed in the opposite order.
+ * That way no parent will be suspended while it still has an active
+ * child.
+ *
+ * Since device_pm_add() may be called with a device semaphore held,
+ * we must never try to acquire a device semaphore while holding
+ * dpm_list_mutex.
+ */
+
 LIST_HEAD(dpm_active);
+static LIST_HEAD(dpm_locked);
 static LIST_HEAD(dpm_off);
 static LIST_HEAD(dpm_off_irq);
 
-static DEFINE_MUTEX(dpm_mtx);
 static DEFINE_MUTEX(dpm_list_mtx);
 
+static DECLARE_RWSEM(pm_sleep_rwsem);
+
 int (*platform_enable_wakeup)(struct device *dev, int is_on);
 
 
@@ -53,29 +74,124 @@ void device_pm_remove(struct device *dev
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
+
+	/* Don't remove a device while the PM core has it locked for suspend */
+	down(&dev->sem);
 	mutex_lock(&dpm_list_mtx);
 	dpm_sysfs_remove(dev);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+}
+
+void device_pm_destroy_suspended(struct device *dev)
+{
+	pr_debug("PM: Removing suspended device %s:%s\n",
+		 dev->bus ? dev->bus->name : "No Bus",
+		 kobject_name(&dev->kobj));
+	mutex_lock(&dpm_list_mtx);
+	list_del_init(&dev->power.entry);
+	mutex_unlock(&dpm_list_mtx);
+	up(&dev->sem);
+	device_unregister(dev);
+}
+
+/**
+ *	pm_sleep_lock - mutual exclusion for registration and suspend
+ *
+ *	Returns 0 if no suspend is underway and device registration
+ *	may proceed, otherwise -EBUSY.
+ */
+int pm_sleep_lock(void)
+{
+	if (down_read_trylock(&pm_sleep_rwsem))
+		return 0;
+	return -EBUSY;
+}
+
+/**
+ *	pm_sleep_unlock - mutual exclusion for registration and suspend
+ *
+ *	This routine undoes the effect of device_pm_add_lock
+ *	when a device's registration is complete.
+ */
+void pm_sleep_unlock(void)
+{
+	up_read(&pm_sleep_rwsem);
 }
 
 
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device - Restore state for one device.
+ *	resume_device_early - Power on one device (early resume).
  *	@dev:	Device.
  *
+ *	Must be called with interrupts disabled.
  */
-
-static int resume_device(struct device * dev)
+static int resume_device_early(struct device *dev)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	down(&dev->sem);
+	if (dev->bus && dev->bus->resume_early) {
+		dev_dbg(dev,"EARLY resume\n");
+		error = dev->bus->resume_early(dev);
+	}
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *
+ *	Walk the dpm_off_irq list and power each device up. This
+ *	is used for devices that required they be powered down with
+ *	interrupts disabled. As devices are powered on, they are moved
+ *	to the dpm_off list.
+ *
+ *	Interrupts must be disabled when calling this.
+ */
+static void dpm_power_up(void)
+{
+	while (!list_empty(&dpm_off_irq)) {
+		struct list_head *entry = dpm_off_irq.next;
+		struct device *dev = to_device(entry);
+
+		resume_device_early(dev);
+		list_move_tail(entry, &dpm_off);
+	}
+}
+
+/**
+ *	device_power_up - Turn on all devices that need special attention.
+ *
+ *	Power on system devices, then devices that required we shut them down
+ *	with interrupts disabled.
+ *
+ *	Must be called with interrupts disabled.
+ */
+void device_power_up(void)
+{
+	sysdev_resume();
+	dpm_power_up();
+}
+EXPORT_SYMBOL_GPL(device_power_up);
+
+/**
+ *	resume_device - Restore state for one device.
+ *	@dev:	Device.
+ *
+ */
+static int resume_device(struct device *dev)
+{
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
 
 	if (dev->bus && dev->bus->resume) {
 		dev_dbg(dev,"resuming\n");
@@ -92,126 +208,68 @@ static int resume_device(struct device *
 		error = dev->class->resume(dev);
 	}
 
-	up(&dev->sem);
-
 	TRACE_RESUME(error);
 	return error;
 }
 
-
-static int resume_device_early(struct device * dev)
+/**
+ *	dpm_resume - Resume every device.
+ *
+ *	Resume the devices that have either not gone through
+ *	the late suspend, or that did go through it but also
+ *	went through the early resume.
+ *
+ *	Take devices from the dpm_off_list, resume them,
+ *	and put them on the dpm_locked list.
+ */
+static void dpm_resume(void)
 {
-	int error = 0;
+	while(!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.next;
+		struct device *dev = to_device(entry);
 
-	TRACE_DEVICE(dev);
-	TRACE_RESUME(0);
-	if (dev->bus && dev->bus->resume_early) {
-		dev_dbg(dev,"EARLY resume\n");
-		error = dev->bus->resume_early(dev);
+		resume_device(dev);
+		list_move_tail(entry, &dpm_locked);
 	}
-	TRACE_RESUME(error);
-	return error;
 }
 
-/*
- * Resume the devices that have either not gone through
- * the late suspend, or that did go through it but also
- * went through the early resume
+/**
+ *	unlock_all_devices - Release each device's semaphore
+ *
+ *	Go through the dpm_off list.  Put each device on the dpm_active
+ *	list and unlock it.
  */
-static void dpm_resume(void)
+static void unlock_all_devices(void)
 {
 	mutex_lock(&dpm_list_mtx);
-	while(!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.next;
-		struct device * dev = to_device(entry);
-
-		get_device(dev);
-		list_move_tail(entry, &dpm_active);
-
-		mutex_unlock(&dpm_list_mtx);
-		resume_device(dev);
-		mutex_lock(&dpm_list_mtx);
-		put_device(dev);
-	}
+ 	while (!list_empty(&dpm_locked)) {
+ 		struct list_head *entry = dpm_locked.prev;
+ 		struct device *dev = to_device(entry);
+
+ 		list_move(entry, &dpm_active);
+ 		up(&dev->sem);
+ 	}
 	mutex_unlock(&dpm_list_mtx);
 }
 
-
 /**
  *	device_resume - Restore state of each device in system.
  *
- *	Walk the dpm_off list, remove each entry, resume the device,
- *	then add it to the dpm_active list.
+ *	Resume all the devices, unlock them all, and allow new
+ *	devices to be registered once again.
  */
-
 void device_resume(void)
 {
 	might_sleep();
-	mutex_lock(&dpm_mtx);
 	dpm_resume();
-	mutex_unlock(&dpm_mtx);
+	unlock_all_devices();
+	up_write(&pm_sleep_rwsem);
 }
-
 EXPORT_SYMBOL_GPL(device_resume);
 
 
-/**
- *	dpm_power_up - Power on some devices.
- *
- *	Walk the dpm_off_irq list and power each device up. This
- *	is used for devices that required they be powered down with
- *	interrupts disabled. As devices are powered on, they are moved
- *	to the dpm_active list.
- *
- *	Interrupts must be disabled when calling this.
- */
-
-static void dpm_power_up(void)
-{
-	while(!list_empty(&dpm_off_irq)) {
-		struct list_head * entry = dpm_off_irq.next;
-		struct device * dev = to_device(entry);
-
-		list_move_tail(entry, &dpm_off);
-		resume_device_early(dev);
-	}
-}
-
-
-/**
- *	device_power_up - Turn on all devices that need special attention.
- *
- *	Power on system devices then devices that required we shut them down
- *	with interrupts disabled.
- *	Called with interrupts disabled.
- */
-
-void device_power_up(void)
-{
-	sysdev_resume();
-	dpm_power_up();
-}
-
-EXPORT_SYMBOL_GPL(device_power_up);
-
-
 /*------------------------- Suspend routines -------------------------*/
 
-/*
- * The entries in the dpm_active list are in a depth first order, simply
- * because children are guaranteed to be discovered after parents, and
- * are inserted at the back of the list on discovery.
- *
- * All list on the suspend path are done in reverse order, so we operate
- * on the leaves of the device tree (or forests, depending on how you want
- * to look at it ;) first. As nodes are removed from the back of the list,
- * they are inserted into the front of their destintation lists.
- *
- * Things are the reverse on the resume path - iterations are done in
- * forward order, and nodes are inserted at the back of their destination
- * lists. This way, the ancestors will be accessed before their descendents.
- */
-
 static inline char *suspend_verb(u32 event)
 {
 	switch (event) {
@@ -222,7 +280,6 @@ static inline char *suspend_verb(u32 eve
 	}
 }
 
-
 static void
 suspend_device_dbg(struct device *dev, pm_message_t state, char *info)
 {
@@ -232,16 +289,69 @@ suspend_device_dbg(struct device *dev, p
 }
 
 /**
- *	suspend_device - Save state of one device.
+ *	suspend_device_late - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state:	Power state device is entering.
+ *
+ *	This is called with interrupts off and only a single CPU running.
  */
+static int suspend_device_late(struct device *dev, pm_message_t state)
+{
+	int error = 0;
+
+	if (dev->bus && dev->bus->suspend_late) {
+		suspend_device_dbg(dev, state, "LATE ");
+		error = dev->bus->suspend_late(dev, state);
+		suspend_report_result(dev->bus->suspend_late, error);
+	}
+	return error;
+}
 
-static int suspend_device(struct device * dev, pm_message_t state)
+/**
+ *	device_power_down - Shut down special devices.
+ *	@state:		Power state to enter.
+ *
+ *	Power down devices that require interrupts to be disabled
+ *	and move them from the dpm_off list to the dpm_off_irq list.
+ *	Then power down system devices.
+ *
+ *	Must be called with interrupts disabled and only one CPU running.
+ */
+int device_power_down(pm_message_t state)
+{
+	int error = 0;
+
+	while (!list_empty(&dpm_off)) {
+		struct list_head *entry = dpm_off.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device_late(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not power down device %s: "
+					"error %d\n",
+					kobject_name(&dev->kobj), error);
+			break;
+		}
+		list_move(&dev->power.entry, &dpm_off_irq);
+	}
+
+	if (!error)
+		error = sysdev_suspend(state);
+	if (error)
+		dpm_power_up();
+	return error;
+}
+EXPORT_SYMBOL_GPL(device_power_down);
+
+/**
+ *	suspend_device - Save state of one device.
+ *	@dev:	Device.
+ *	@state:	Power state device is entering.
+ */
+int suspend_device(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
 	if (dev->power.power_state.event) {
 		dev_dbg(dev, "PM: suspend %d-->%d\n",
 			dev->power.power_state.event, state.event);
@@ -264,123 +374,95 @@ static int suspend_device(struct device 
 		error = dev->bus->suspend(dev, state);
 		suspend_report_result(dev->bus->suspend, error);
 	}
-	up(&dev->sem);
 	return error;
 }
 
-
-/*
- * This is called with interrupts off, only a single CPU
- * running. We can't acquire a mutex or semaphore (and we don't
- * need the protection)
+/**
+ *	dpm_suspend - Suspend every device.
+ *	@state:	Power state to put each device in.
+ *
+ *	Walk the dpm_locked list.  Suspend each device and move it
+ *	to the dpm_off list.
+ *
+ *	(For historical reasons, if it returns -EAGAIN, that used to mean
+ *	that the device would be called again with interrupts disabled.
+ *	These days, we use the "suspend_late()" callback for that, so we
+ *	print a warning and consider it an error).
  */
-static int suspend_device_late(struct device *dev, pm_message_t state)
+static int dpm_suspend(pm_message_t state)
 {
 	int error = 0;
 
-	if (dev->bus && dev->bus->suspend_late) {
-		suspend_device_dbg(dev, state, "LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
+	while (!list_empty(&dpm_locked)) {
+		struct list_head *entry = dpm_locked.prev;
+		struct device *dev = to_device(entry);
+
+		error = suspend_device(dev, state);
+		if (error) {
+			printk(KERN_ERR "Could not suspend device %s: "
+					"error %d%s\n",
+					kobject_name(&dev->kobj),
+					error,
+					(error == -EAGAIN ?
+					" (please convert to suspend_late)" :
+					""));
+			break;
+		}
+ 		list_move(&dev->power.entry, &dpm_off);
 	}
+
 	return error;
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
- *	@state:		Power state to put each device in.
- *
- *	Walk the dpm_active list, call ->suspend() for each device, and move
- *	it to the dpm_off list.
- *
- *	(For historical reasons, if it returns -EAGAIN, that used to mean
- *	that the device would be called again with interrupts disabled.
- *	These days, we use the "suspend_late()" callback for that, so we
- *	print a warning and consider it an error).
- *
- *	If we get a different error, try and back out.
- *
- *	If we hit a failure with any of the devices, call device_resume()
- *	above to bring the suspended devices back to life.
+ *	lock_all_devices - Acquire every device's semaphore
  *
+ *	Go through the dpm_active list. Carefully lock each device's
+ *	semaphore and put it in on the dpm_locked list.
  */
-
-int device_suspend(pm_message_t state)
+static void lock_all_devices(void)
 {
-	int error = 0;
-
-	might_sleep();
-	mutex_lock(&dpm_mtx);
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_active) && error == 0) {
-		struct list_head * entry = dpm_active.prev;
-		struct device * dev = to_device(entry);
-
+	while (!list_empty(&dpm_active)) {
+		struct list_head *entry = dpm_active.next;
+		struct device *dev = to_device(entry);
+
+		/* Required locking order is dev->sem first,
+		 * then dpm_list_mutex.  Hence this awkward code.
+		 */
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
-
-		error = suspend_device(dev, state);
-
+		down(&dev->sem);
 		mutex_lock(&dpm_list_mtx);
 
-		/* Check if the device got removed */
-		if (!list_empty(&dev->power.entry)) {
-			/* Move it to the dpm_off list */
-			if (!error)
-				list_move(&dev->power.entry, &dpm_off);
-		}
-		if (error)
-			printk(KERN_ERR "Could not suspend device %s: "
-				"error %d%s\n",
-				kobject_name(&dev->kobj), error,
-				error == -EAGAIN ? " (please convert to suspend_late)" : "");
+		if (list_empty(entry))
+			up(&dev->sem);		/* Device was removed */
+		else
+			list_move_tail(entry, &dpm_locked);
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
-	if (error)
-		dpm_resume();
-
-	mutex_unlock(&dpm_mtx);
-	return error;
 }
 
-EXPORT_SYMBOL_GPL(device_suspend);
-
 /**
- *	device_power_down - Shut down special devices.
- *	@state:		Power state to enter.
+ *	device_suspend - Save state and stop all devices in system.
  *
- *	Walk the dpm_off_irq list, calling ->power_down() for each device that
- *	couldn't power down the device with interrupts enabled. When we're
- *	done, power down system devices.
+ *	Prevent new devices from being registered, then lock all devices
+ *	and suspend them.
  */
-
-int device_power_down(pm_message_t state)
+int device_suspend(pm_message_t state)
 {
-	int error = 0;
-	struct device * dev;
-
-	while (!list_empty(&dpm_off)) {
-		struct list_head * entry = dpm_off.prev;
-
-		dev = to_device(entry);
-		error = suspend_device_late(dev, state);
-		if (error)
-			goto Error;
-		list_move(&dev->power.entry, &dpm_off_irq);
-	}
+	int error;
 
-	error = sysdev_suspend(state);
- Done:
+	might_sleep();
+	down_write(&pm_sleep_rwsem);
+	lock_all_devices();
+	error = dpm_suspend(state);
+	if (error)
+		device_resume();
 	return error;
- Error:
-	printk(KERN_ERR "Could not power down device %s: "
-		"error %d\n", kobject_name(&dev->kobj), error);
-	dpm_power_up();
-	goto Done;
 }
-
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
Index: linux-2.6/arch/x86/kernel/msr.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/msr.c
+++ linux-2.6/arch/x86/kernel/msr.c
@@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callb
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = msr_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		msr_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
Index: linux-2.6/arch/x86/kernel/cpuid.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpuid.c
+++ linux-2.6/arch/x86/kernel/cpuid.c
@@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_cal
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
 		err = cpuid_device_create(cpu);
 		break;
 	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		cpuid_device_destroy(cpu);
 		break;
+	case CPU_UP_CANCELED_FROZEN:
+		destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		break;
 	}
 	return err ? NOTIFY_BAD : NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 70+ messages in thread

end of thread, other threads:[~2008-01-10 17:04 UTC | newest]

Thread overview: 70+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-05 18:36 [PATCH] PM: Acquire device locks on suspend Rafael J. Wysocki
2008-01-05 18:36 Rafael J. Wysocki
2008-01-05 20:08 ` Alan Stern
2008-01-05 20:08 ` Alan Stern
2008-01-05 20:19   ` Rafael J. Wysocki
2008-01-05 20:39     ` Alan Stern
2008-01-05 21:13       ` Rafael J. Wysocki
2008-01-05 21:13       ` Rafael J. Wysocki
2008-01-05 21:41         ` Alan Stern
2008-01-05 21:58           ` Rafael J. Wysocki
2008-01-06  4:04             ` Alan Stern
2008-01-06  4:04             ` Alan Stern
2008-01-06 13:19               ` Rafael J. Wysocki
2008-01-06 17:06                 ` Alan Stern
2008-01-06 19:05                   ` Rafael J. Wysocki
2008-01-06 19:57                     ` Rafael J. Wysocki
2008-01-06 22:19                       ` Rafael J. Wysocki
2008-01-06 22:19                       ` Rafael J. Wysocki
2008-01-06 22:21                       ` Alan Stern
2008-01-06 22:21                       ` Alan Stern
2008-01-06 22:34                         ` Rafael J. Wysocki
2008-01-06 22:39                           ` Alan Stern
2008-01-06 22:39                           ` Alan Stern
2008-01-06 22:47                             ` Rafael J. Wysocki
2008-01-07 16:16                               ` Alan Stern
2008-01-07 16:51                                 ` Rafael J. Wysocki
2008-01-07 17:23                                   ` Alan Stern
2008-01-07 18:01                                     ` Rafael J. Wysocki
2008-01-07 18:01                                     ` Rafael J. Wysocki
2008-01-07 19:29                                       ` Alan Stern
2008-01-07 20:37                                         ` Rafael J. Wysocki
2008-01-07 20:37                                         ` Rafael J. Wysocki
2008-01-07 21:32                                           ` Alan Stern
2008-01-08  0:25                                             ` Rafael J. Wysocki
2008-01-09 21:01                                               ` Alan Stern
2008-01-09 22:14                                                 ` Rafael J. Wysocki
2008-01-09 22:46                                                   ` Alan Stern
2008-01-09 22:46                                                   ` Alan Stern
2008-01-09 23:29                                                     ` Rafael J. Wysocki
2008-01-10 15:35                                                       ` Alan Stern
2008-01-10 15:35                                                       ` Alan Stern
2008-01-10 16:59                                                         ` Rafael J. Wysocki
2008-01-10 17:04                                                           ` Alan Stern
2008-01-10 17:04                                                           ` Alan Stern
2008-01-10 16:59                                                         ` Rafael J. Wysocki
2008-01-09 23:29                                                     ` Rafael J. Wysocki
2008-01-09 22:14                                                 ` Rafael J. Wysocki
2008-01-09 21:01                                               ` Alan Stern
2008-01-08  0:25                                             ` Rafael J. Wysocki
2008-01-07 21:32                                           ` Alan Stern
2008-01-07 19:29                                       ` Alan Stern
2008-01-07 17:23                                   ` Alan Stern
2008-01-07 16:51                                 ` Rafael J. Wysocki
2008-01-07 16:16                               ` Alan Stern
2008-01-06 22:47                             ` Rafael J. Wysocki
2008-01-06 22:34                         ` Rafael J. Wysocki
2008-01-06 19:57                     ` Rafael J. Wysocki
2008-01-06 22:11                     ` Alan Stern
2008-01-06 22:24                       ` Rafael J. Wysocki
2008-01-06 22:31                         ` Alan Stern
2008-01-06 22:31                         ` Alan Stern
2008-01-06 22:24                       ` Rafael J. Wysocki
2008-01-06 22:11                     ` Alan Stern
2008-01-06 19:05                   ` Rafael J. Wysocki
2008-01-06 17:06                 ` Alan Stern
2008-01-06 13:19               ` Rafael J. Wysocki
2008-01-05 21:58           ` Rafael J. Wysocki
2008-01-05 21:41         ` Alan Stern
2008-01-05 20:39     ` Alan Stern
2008-01-05 20:19   ` Rafael J. Wysocki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.