linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] driver core: shut down devices asynchronously
@ 2023-08-16 15:45 Stuart Hayes
  2023-08-16 15:54 ` Lukas Wunner
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stuart Hayes @ 2023-08-16 15:45 UTC (permalink / raw)
  To: linux-kernel, Greg Kroah-Hartman, Rafael J . Wysocki,
	Tanjore Suresh, Martin Belanger, Oliver O'Halloran,
	Daniel Wagner, Keith Busch, Lukas Wunner
  Cc: Stuart Hayes

Attempt to shut down devices asynchronously, by making a tree of devices with
associated work and completion structs, to ensure that child devices are shut
down before parents.

This can dramatically reduce system shutdown/reboot time on systems that have
devices that take many seconds to shut down, such as some NVMe drives.  On once
system tested, the shutdown time went from 11 minutes before the patch to 55
seconds with the patch.

The code could be simplified by adding the work and completion structs to
struct device, but it may make more sense to not burden it with that when there
is likely enough memory to allocate this at shutdown time, and if there isn’t,
it just falls back to the current synchronous shutdown.

Signed-off-by: Stuart Hayes <stuart.w.hayes@gmail.com>
---
 drivers/base/core.c | 216 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 189 insertions(+), 27 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3dff5037943e..fec571f56843 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4709,6 +4709,187 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
 }
 EXPORT_SYMBOL_GPL(device_change_owner);
 
+static void shutdown_device(struct device *dev, struct device *parent)
+{
+	/* hold lock to avoid race with probe/release */
+	if (parent)
+		device_lock(parent);
+	device_lock(dev);
+
+	/* Don't allow any more runtime suspends */
+	pm_runtime_get_noresume(dev);
+	pm_runtime_barrier(dev);
+
+	if (dev->class && dev->class->shutdown_pre) {
+		if (initcall_debug)
+			dev_info(dev, "shutdown_pre\n");
+		dev->class->shutdown_pre(dev);
+	}
+	if (dev->bus && dev->bus->shutdown) {
+		if (initcall_debug)
+			dev_info(dev, "shutdown\n");
+		dev->bus->shutdown(dev);
+	} else if (dev->driver && dev->driver->shutdown) {
+		if (initcall_debug)
+			dev_info(dev, "shutdown\n");
+		dev->driver->shutdown(dev);
+	}
+
+	device_unlock(dev);
+	if (parent)
+		device_unlock(parent);
+}
+
+struct shutdown_work {
+	struct list_head node;
+	struct device *dev;
+	struct work_struct work;
+	struct completion complete;
+	struct list_head children;
+};
+
+void shutdown_dev_work(struct work_struct *work)
+{
+	struct shutdown_work *sd_work = container_of(work, struct shutdown_work, work);
+	struct shutdown_work *child_sd_work;
+	struct device *dev = sd_work->dev;
+
+	/*
+	 * wait for child devices to finish shutdown
+	 */
+	list_for_each_entry(child_sd_work, &sd_work->children, node) {
+		wait_for_completion(&child_sd_work->complete);
+	}
+
+	if (dev) {
+		/*
+		 * Make sure the device is off the kset list, in the
+		 * event that dev->*->shutdown() doesn't remove it.
+		 */
+		spin_lock(&devices_kset->list_lock);
+		list_del_init(&dev->kobj.entry);
+		spin_unlock(&devices_kset->list_lock);
+
+		shutdown_device(dev, dev->parent);
+	}
+
+	complete(&sd_work->complete);
+}
+
+static void schedule_shutdown_work(struct shutdown_work *dev_shutdown_work)
+{
+	struct shutdown_work *child;
+
+	/*
+	 * schedule children to be shutdown before parents
+	 */
+	list_for_each_entry(child, &dev_shutdown_work->children, node) {
+		schedule_shutdown_work(child);
+	}
+
+	schedule_work(&dev_shutdown_work->work);
+}
+
+static void free_shutdown_tree(struct shutdown_work *tree)
+{
+	struct shutdown_work *childitem, *tmp;
+
+	if (tree) {
+		list_for_each_entry_safe(childitem, tmp, &tree->children, node) {
+			put_device(childitem->dev);
+			list_del(&childitem->node);
+			free_shutdown_tree(childitem);
+		}
+		kfree(tree);
+	}
+}
+
+static struct shutdown_work *create_shutdown_tree(struct device *dev,
+						  struct shutdown_work *parent)
+{
+	struct klist_iter i;
+	struct shutdown_work *dev_sdwork;
+	int error = 0;
+
+	/*
+	 * alloc & init shutdown_work for this device
+	 */
+	dev_sdwork = kzalloc(sizeof(*dev_sdwork), GFP_KERNEL);
+	if (!dev_sdwork)
+		goto fail;
+
+	if (dev) {
+		dev_sdwork->dev = dev;
+		get_device(dev);
+	}
+	INIT_WORK(&dev_sdwork->work, shutdown_dev_work);
+	INIT_LIST_HEAD(&dev_sdwork->children);
+	INIT_LIST_HEAD(&dev_sdwork->node);
+	init_completion(&dev_sdwork->complete);
+
+	if (parent) {
+		/*
+		 * add shutdown_work for a device's children
+		 */
+		klist_iter_init(&dev_sdwork->dev->p->klist_children, &i);
+		while (!error && (dev = next_device(&i)))
+			error = !create_shutdown_tree(dev, dev_sdwork);
+		klist_iter_exit(&i);
+
+		if (error)
+			goto fail;
+
+		list_add_tail(&dev_sdwork->node, &parent->children);
+		return dev_sdwork;
+	}
+
+	/*
+	 * add shutdown_work for top level devices
+	 */
+	spin_lock(&devices_kset->list_lock);
+	list_for_each_entry(dev, &devices_kset->list, kobj.entry) {
+		if (!dev->parent)
+			error = !create_shutdown_tree(dev, dev_sdwork);
+		if (error)
+			break;
+	}
+	spin_unlock(&devices_kset->list_lock);
+
+	if (error)
+		goto fail;
+
+	return dev_sdwork;
+
+fail:
+	free_shutdown_tree(dev_sdwork);
+	return NULL;
+}
+
+/**
+ * device_shutdown_async - schedule ->shutdown() on each device to shutdown
+ * asynchronously, ensuring each device's children are shut down before
+ * shutting down the device
+ */
+static int device_shutdown_async(void)
+{
+	struct shutdown_work *shutdown_work_tree;
+
+	/*
+	 * build tree with devices to be shut down
+	 */
+	shutdown_work_tree = create_shutdown_tree(NULL, NULL);
+	if (!shutdown_work_tree)
+		return -ENOMEM;
+
+	/*
+	 * schedule the work to run & wait for it to finish
+	 */
+	schedule_shutdown_work(shutdown_work_tree);
+	wait_for_completion(&shutdown_work_tree->complete);
+	free_shutdown_tree(shutdown_work_tree);
+	return 0;
+}
+
 /**
  * device_shutdown - call ->shutdown() on each device to shutdown.
  */
@@ -4721,6 +4902,13 @@ void device_shutdown(void)
 
 	cpufreq_suspend();
 
+	if (initcall_debug)
+		pr_info("attempting asynchronous device shutdown\n");
+	if (!device_shutdown_async())
+		return;
+	if (initcall_debug)
+		pr_info("starting synchronous device shutdown\n");
+
 	spin_lock(&devices_kset->list_lock);
 	/*
 	 * Walk the devices list backward, shutting down each in turn.
@@ -4745,33 +4933,7 @@ void device_shutdown(void)
 		list_del_init(&dev->kobj.entry);
 		spin_unlock(&devices_kset->list_lock);
 
-		/* hold lock to avoid race with probe/release */
-		if (parent)
-			device_lock(parent);
-		device_lock(dev);
-
-		/* Don't allow any more runtime suspends */
-		pm_runtime_get_noresume(dev);
-		pm_runtime_barrier(dev);
-
-		if (dev->class && dev->class->shutdown_pre) {
-			if (initcall_debug)
-				dev_info(dev, "shutdown_pre\n");
-			dev->class->shutdown_pre(dev);
-		}
-		if (dev->bus && dev->bus->shutdown) {
-			if (initcall_debug)
-				dev_info(dev, "shutdown\n");
-			dev->bus->shutdown(dev);
-		} else if (dev->driver && dev->driver->shutdown) {
-			if (initcall_debug)
-				dev_info(dev, "shutdown\n");
-			dev->driver->shutdown(dev);
-		}
-
-		device_unlock(dev);
-		if (parent)
-			device_unlock(parent);
+		shutdown_device(dev, parent);
 
 		put_device(dev);
 		put_device(parent);
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] driver core: shut down devices asynchronously
  2023-08-16 15:45 [PATCH] driver core: shut down devices asynchronously Stuart Hayes
@ 2023-08-16 15:54 ` Lukas Wunner
  2023-08-16 19:42   ` stuart hayes
  2023-08-16 22:07 ` kernel test robot
  2023-08-17  0:10 ` kernel test robot
  2 siblings, 1 reply; 6+ messages in thread
From: Lukas Wunner @ 2023-08-16 15:54 UTC (permalink / raw)
  To: Stuart Hayes
  Cc: linux-kernel, Greg Kroah-Hartman, Rafael J . Wysocki,
	Tanjore Suresh, Martin Belanger, Oliver O'Halloran,
	Daniel Wagner, Keith Busch

On Wed, Aug 16, 2023 at 10:45:18AM -0500, Stuart Hayes wrote:
> Attempt to shut down devices asynchronously, by making a tree of devices with
> associated work and completion structs, to ensure that child devices are shut
> down before parents.
> 
> This can dramatically reduce system shutdown/reboot time on systems that have
> devices that take many seconds to shut down, such as some NVMe drives.  On once
> system tested, the shutdown time went from 11 minutes before the patch to 55
> seconds with the patch.
> 
> The code could be simplified by adding the work and completion structs to
> struct device, but it may make more sense to not burden it with that when there
> is likely enough memory to allocate this at shutdown time, and if there isn???t,
> it just falls back to the current synchronous shutdown.

Please wrap the commit message at 72 chars.

Is there a particular reason why you're not using the infrastructure
provided by kernel/async.c and <async.h>, such as async_schedule()?
It wraps all the work_struct plumbing and also has helpers to await
completion.  I imagine using that might reduce LoC in this patch.

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] driver core: shut down devices asynchronously
  2023-08-16 15:54 ` Lukas Wunner
@ 2023-08-16 19:42   ` stuart hayes
  2023-08-16 19:52     ` Lukas Wunner
  0 siblings, 1 reply; 6+ messages in thread
From: stuart hayes @ 2023-08-16 19:42 UTC (permalink / raw)
  To: Lukas Wunner
  Cc: linux-kernel, Greg Kroah-Hartman, Rafael J . Wysocki,
	Tanjore Suresh, Martin Belanger, Oliver O'Halloran,
	Daniel Wagner, Keith Busch



On 8/16/2023 10:54 AM, Lukas Wunner wrote:
> On Wed, Aug 16, 2023 at 10:45:18AM -0500, Stuart Hayes wrote:
>> Attempt to shut down devices asynchronously, by making a tree of devices with
>> associated work and completion structs, to ensure that child devices are shut
>> down before parents.
>>
>> This can dramatically reduce system shutdown/reboot time on systems that have
>> devices that take many seconds to shut down, such as some NVMe drives.  On once
>> system tested, the shutdown time went from 11 minutes before the patch to 55
>> seconds with the patch.
>>
>> The code could be simplified by adding the work and completion structs to
>> struct device, but it may make more sense to not burden it with that when there
>> is likely enough memory to allocate this at shutdown time, and if there isn???t,
>> it just falls back to the current synchronous shutdown.
> 
> Please wrap the commit message at 72 chars.
> 

Thanks

> Is there a particular reason why you're not using the infrastructure
> provided by kernel/async.c and <async.h>, such as async_schedule()?
> It wraps all the work_struct plumbing and also has helpers to await
> completion.  I imagine using that might reduce LoC in this patch.
> 

Not a good one.  Let me look into this, thank you.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] driver core: shut down devices asynchronously
  2023-08-16 19:42   ` stuart hayes
@ 2023-08-16 19:52     ` Lukas Wunner
  0 siblings, 0 replies; 6+ messages in thread
From: Lukas Wunner @ 2023-08-16 19:52 UTC (permalink / raw)
  To: stuart hayes
  Cc: linux-kernel, Greg Kroah-Hartman, Rafael J . Wysocki,
	Tanjore Suresh, Martin Belanger, Oliver O'Halloran,
	Daniel Wagner, Keith Busch

On Wed, Aug 16, 2023 at 02:42:26PM -0500, stuart hayes wrote:
> On 8/16/2023 10:54 AM, Lukas Wunner wrote:
> > Is there a particular reason why you're not using the infrastructure
> > provided by kernel/async.c and <async.h>, such as async_schedule()?
> > It wraps all the work_struct plumbing and also has helpers to await
> > completion.  I imagine using that might reduce LoC in this patch.
> 
> Not a good one.  Let me look into this, thank you.

A word of caution on async_synchronize_cookie(), it awaits the cookies
*before* the one you're passing into the function, so depending on the
use case it may be necessary to add + 1 to the cookie:

https://lore.kernel.org/intel-gfx/20160621075704.GB1821@nuc-i3427.alporthouse.com/

It's a nasty gotcha.  Just so you're aware of it. :)

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] driver core: shut down devices asynchronously
  2023-08-16 15:45 [PATCH] driver core: shut down devices asynchronously Stuart Hayes
  2023-08-16 15:54 ` Lukas Wunner
@ 2023-08-16 22:07 ` kernel test robot
  2023-08-17  0:10 ` kernel test robot
  2 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2023-08-16 22:07 UTC (permalink / raw)
  To: Stuart Hayes, linux-kernel, Greg Kroah-Hartman,
	Rafael J . Wysocki, Tanjore Suresh, Martin Belanger,
	Oliver O'Halloran, Daniel Wagner, Keith Busch, Lukas Wunner
  Cc: oe-kbuild-all, Stuart Hayes

Hi Stuart,

kernel test robot noticed the following build warnings:

[auto build test WARNING on driver-core/driver-core-testing]
[also build test WARNING on driver-core/driver-core-next driver-core/driver-core-linus linus/master v6.5-rc6 next-20230816]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Stuart-Hayes/driver-core-shut-down-devices-asynchronously/20230816-234737
base:   driver-core/driver-core-testing
patch link:    https://lore.kernel.org/r/20230816154518.3487-1-stuart.w.hayes%40gmail.com
patch subject: [PATCH] driver core: shut down devices asynchronously
config: loongarch-allyesconfig (https://download.01.org/0day-ci/archive/20230817/202308170534.naCLTFzQ-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230817/202308170534.naCLTFzQ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308170534.naCLTFzQ-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/base/core.c:4762:6: warning: no previous prototype for 'shutdown_dev_work' [-Wmissing-prototypes]
    4762 | void shutdown_dev_work(struct work_struct *work)
         |      ^~~~~~~~~~~~~~~~~


vim +/shutdown_dev_work +4762 drivers/base/core.c

  4761	
> 4762	void shutdown_dev_work(struct work_struct *work)
  4763	{
  4764		struct shutdown_work *sd_work = container_of(work, struct shutdown_work, work);
  4765		struct shutdown_work *child_sd_work;
  4766		struct device *dev = sd_work->dev;
  4767	
  4768		/*
  4769		 * wait for child devices to finish shutdown
  4770		 */
  4771		list_for_each_entry(child_sd_work, &sd_work->children, node) {
  4772			wait_for_completion(&child_sd_work->complete);
  4773		}
  4774	
  4775		if (dev) {
  4776			/*
  4777			 * Make sure the device is off the kset list, in the
  4778			 * event that dev->*->shutdown() doesn't remove it.
  4779			 */
  4780			spin_lock(&devices_kset->list_lock);
  4781			list_del_init(&dev->kobj.entry);
  4782			spin_unlock(&devices_kset->list_lock);
  4783	
  4784			shutdown_device(dev, dev->parent);
  4785		}
  4786	
  4787		complete(&sd_work->complete);
  4788	}
  4789	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] driver core: shut down devices asynchronously
  2023-08-16 15:45 [PATCH] driver core: shut down devices asynchronously Stuart Hayes
  2023-08-16 15:54 ` Lukas Wunner
  2023-08-16 22:07 ` kernel test robot
@ 2023-08-17  0:10 ` kernel test robot
  2 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2023-08-17  0:10 UTC (permalink / raw)
  To: Stuart Hayes, linux-kernel, Greg Kroah-Hartman,
	Rafael J . Wysocki, Tanjore Suresh, Martin Belanger,
	Oliver O'Halloran, Daniel Wagner, Keith Busch, Lukas Wunner
  Cc: oe-kbuild-all, Stuart Hayes

Hi Stuart,

kernel test robot noticed the following build warnings:

[auto build test WARNING on driver-core/driver-core-testing]
[also build test WARNING on driver-core/driver-core-next driver-core/driver-core-linus linus/master v6.5-rc6 next-20230816]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Stuart-Hayes/driver-core-shut-down-devices-asynchronously/20230816-234737
base:   driver-core/driver-core-testing
patch link:    https://lore.kernel.org/r/20230816154518.3487-1-stuart.w.hayes%40gmail.com
patch subject: [PATCH] driver core: shut down devices asynchronously
config: i386-defconfig (https://download.01.org/0day-ci/archive/20230817/202308170710.K525oxZ5-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce: (https://download.01.org/0day-ci/archive/20230817/202308170710.K525oxZ5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308170710.K525oxZ5-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/base/core.c:4762:6: warning: no previous declaration for 'shutdown_dev_work' [-Wmissing-declarations]
    void shutdown_dev_work(struct work_struct *work)
         ^~~~~~~~~~~~~~~~~


vim +/shutdown_dev_work +4762 drivers/base/core.c

  4761	
> 4762	void shutdown_dev_work(struct work_struct *work)
  4763	{
  4764		struct shutdown_work *sd_work = container_of(work, struct shutdown_work, work);
  4765		struct shutdown_work *child_sd_work;
  4766		struct device *dev = sd_work->dev;
  4767	
  4768		/*
  4769		 * wait for child devices to finish shutdown
  4770		 */
  4771		list_for_each_entry(child_sd_work, &sd_work->children, node) {
  4772			wait_for_completion(&child_sd_work->complete);
  4773		}
  4774	
  4775		if (dev) {
  4776			/*
  4777			 * Make sure the device is off the kset list, in the
  4778			 * event that dev->*->shutdown() doesn't remove it.
  4779			 */
  4780			spin_lock(&devices_kset->list_lock);
  4781			list_del_init(&dev->kobj.entry);
  4782			spin_unlock(&devices_kset->list_lock);
  4783	
  4784			shutdown_device(dev, dev->parent);
  4785		}
  4786	
  4787		complete(&sd_work->complete);
  4788	}
  4789	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-08-17  0:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-16 15:45 [PATCH] driver core: shut down devices asynchronously Stuart Hayes
2023-08-16 15:54 ` Lukas Wunner
2023-08-16 19:42   ` stuart hayes
2023-08-16 19:52     ` Lukas Wunner
2023-08-16 22:07 ` kernel test robot
2023-08-17  0:10 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).