[v1,1/2] Revert "PM: runtime: Update device status before letting suppliers suspend"
diff mbox series

Message ID 4304785.LvFx2qVVIh@kreacher
State New, archived
Headers show
Series
  • PM: runtime: Update device status before letting suppliers suspend (another take)
Related show

Commit Message

Rafael J. Wysocki March 18, 2021, 6:09 p.m. UTC
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>

Revert commit 44cc89f76464 ("PM: runtime: Update device status
before letting suppliers suspend") that introduced a race condition
into __rpm_callback() which allowed a concurrent rpm_resume() to
run and resume the device prematurely after its status had been
changed to RPM_SUSPENDED by __rpm_callback().

Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend")
Link: https://lore.kernel.org/linux-pm/24dfb6fc-5d54-6ee2-9195-26428b7ecf8a@intel.com/
Reported by: Adrian Hunter <adrian.hunter@intel.com>
Cc: 4.10+ <stable@vger.kernel.org> # 4.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 62 +++++++++++++++---------------------
 1 file changed, 25 insertions(+), 37 deletions(-)

Comments

Ulf Hansson March 19, 2021, 1:29 p.m. UTC | #1
On Thu, 18 Mar 2021 at 19:15, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>
> From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
>
> Revert commit 44cc89f76464 ("PM: runtime: Update device status
> before letting suppliers suspend") that introduced a race condition
> into __rpm_callback() which allowed a concurrent rpm_resume() to
> run and resume the device prematurely after its status had been
> changed to RPM_SUSPENDED by __rpm_callback().

Huh, the code path is not entirely easy to follow. :-)

Did you find this by code inspection or did you get an error report?

>
> Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend")
> Link: https://lore.kernel.org/linux-pm/24dfb6fc-5d54-6ee2-9195-26428b7ecf8a@intel.com/
> Reported by: Adrian Hunter <adrian.hunter@intel.com>
> Cc: 4.10+ <stable@vger.kernel.org> # 4.10+
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>

Kind regards
Uffe


> ---
>  drivers/base/power/runtime.c | 62 +++++++++++++++---------------------
>  1 file changed, 25 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 18b82427d0cb..a46a7e30881b 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev)
>  static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
>         __releases(&dev->power.lock) __acquires(&dev->power.lock)
>  {
> -       bool use_links = dev->power.links_count > 0;
> -       bool get = false;
>         int retval, idx;
> -       bool put;
> +       bool use_links = dev->power.links_count > 0;
>
>         if (dev->power.irq_safe) {
>                 spin_unlock(&dev->power.lock);
> -       } else if (!use_links) {
> -               spin_unlock_irq(&dev->power.lock);
>         } else {
> -               get = dev->power.runtime_status == RPM_RESUMING;
> -
>                 spin_unlock_irq(&dev->power.lock);
>
> -               /* Resume suppliers if necessary. */
> -               if (get) {
> +               /*
> +                * Resume suppliers if necessary.
> +                *
> +                * The device's runtime PM status cannot change until this
> +                * routine returns, so it is safe to read the status outside of
> +                * the lock.
> +                */
> +               if (use_links && dev->power.runtime_status == RPM_RESUMING) {
>                         idx = device_links_read_lock();
>
>                         retval = rpm_get_suppliers(dev);
> @@ -355,36 +355,24 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
>
>         if (dev->power.irq_safe) {
>                 spin_lock(&dev->power.lock);
> -               return retval;
> -       }
> -
> -       spin_lock_irq(&dev->power.lock);
> -
> -       if (!use_links)
> -               return retval;
> -
> -       /*
> -        * If the device is suspending and the callback has returned success,
> -        * drop the usage counters of the suppliers that have been reference
> -        * counted on its resume.
> -        *
> -        * Do that if the resume fails too.
> -        */
> -       put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
> -       if (put)
> -               __update_runtime_status(dev, RPM_SUSPENDED);
> -       else
> -               put = get && retval;
> -
> -       if (put) {
> -               spin_unlock_irq(&dev->power.lock);
> -
> -               idx = device_links_read_lock();
> +       } else {
> +               /*
> +                * If the device is suspending and the callback has returned
> +                * success, drop the usage counters of the suppliers that have
> +                * been reference counted on its resume.
> +                *
> +                * Do that if resume fails too.
> +                */
> +               if (use_links
> +                   && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
> +                   || (dev->power.runtime_status == RPM_RESUMING && retval))) {
> +                       idx = device_links_read_lock();
>
> -fail:
> -               rpm_put_suppliers(dev);
> + fail:
> +                       rpm_put_suppliers(dev);
>
> -               device_links_read_unlock(idx);
> +                       device_links_read_unlock(idx);
> +               }
>
>                 spin_lock_irq(&dev->power.lock);
>         }
> --
> 2.26.2
>
>
>
>
Rafael J. Wysocki March 19, 2021, 1:43 p.m. UTC | #2
On Fri, Mar 19, 2021 at 2:31 PM Ulf Hansson <ulf.hansson@linaro.org> wrote:
>
> On Thu, 18 Mar 2021 at 19:15, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> >
> > From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
> >
> > Revert commit 44cc89f76464 ("PM: runtime: Update device status
> > before letting suppliers suspend") that introduced a race condition
> > into __rpm_callback() which allowed a concurrent rpm_resume() to
> > run and resume the device prematurely after its status had been
> > changed to RPM_SUSPENDED by __rpm_callback().
>
> Huh, the code path is not entirely easy to follow. :-)
>
> Did you find this by code inspection or did you get an error report?

There was a bug report that caused me to look at the code once again.

> > Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend")
> > Link: https://lore.kernel.org/linux-pm/24dfb6fc-5d54-6ee2-9195-26428b7ecf8a@intel.com/
> > Reported by: Adrian Hunter <adrian.hunter@intel.com>
> > Cc: 4.10+ <stable@vger.kernel.org> # 4.10+
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>

Thanks!

Patch
diff mbox series

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 18b82427d0cb..a46a7e30881b 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -325,22 +325,22 @@  static void rpm_put_suppliers(struct device *dev)
 static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
-	bool use_links = dev->power.links_count > 0;
-	bool get = false;
 	int retval, idx;
-	bool put;
+	bool use_links = dev->power.links_count > 0;
 
 	if (dev->power.irq_safe) {
 		spin_unlock(&dev->power.lock);
-	} else if (!use_links) {
-		spin_unlock_irq(&dev->power.lock);
 	} else {
-		get = dev->power.runtime_status == RPM_RESUMING;
-
 		spin_unlock_irq(&dev->power.lock);
 
-		/* Resume suppliers if necessary. */
-		if (get) {
+		/*
+		 * Resume suppliers if necessary.
+		 *
+		 * The device's runtime PM status cannot change until this
+		 * routine returns, so it is safe to read the status outside of
+		 * the lock.
+		 */
+		if (use_links && dev->power.runtime_status == RPM_RESUMING) {
 			idx = device_links_read_lock();
 
 			retval = rpm_get_suppliers(dev);
@@ -355,36 +355,24 @@  static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 
 	if (dev->power.irq_safe) {
 		spin_lock(&dev->power.lock);
-		return retval;
-	}
-
-	spin_lock_irq(&dev->power.lock);
-
-	if (!use_links)
-		return retval;
-
-	/*
-	 * If the device is suspending and the callback has returned success,
-	 * drop the usage counters of the suppliers that have been reference
-	 * counted on its resume.
-	 *
-	 * Do that if the resume fails too.
-	 */
-	put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
-	if (put)
-		__update_runtime_status(dev, RPM_SUSPENDED);
-	else
-		put = get && retval;
-
-	if (put) {
-		spin_unlock_irq(&dev->power.lock);
-
-		idx = device_links_read_lock();
+	} else {
+		/*
+		 * If the device is suspending and the callback has returned
+		 * success, drop the usage counters of the suppliers that have
+		 * been reference counted on its resume.
+		 *
+		 * Do that if resume fails too.
+		 */
+		if (use_links
+		    && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
+		    || (dev->power.runtime_status == RPM_RESUMING && retval))) {
+			idx = device_links_read_lock();
 
-fail:
-		rpm_put_suppliers(dev);
+ fail:
+			rpm_put_suppliers(dev);
 
-		device_links_read_unlock(idx);
+			device_links_read_unlock(idx);
+		}
 
 		spin_lock_irq(&dev->power.lock);
 	}