linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/etnaviv: fix ref count leak via pm_runtime_get_sync
@ 2020-06-14  6:46 Navid Emamdoost
  2020-06-14  9:48 ` Andy Shevchenko
  0 siblings, 1 reply; 6+ messages in thread
From: Navid Emamdoost @ 2020-06-14  6:46 UTC (permalink / raw)
  To: Lucas Stach, Russell King, Christian Gmeiner, David Airlie,
	Daniel Vetter, etnaviv, dri-devel, linux-kernel
  Cc: emamd001, wu000273, kjlu, smccaman, Navid Emamdoost

in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
counter even in case of failure, leading to incorrect ref count.
In case of failure, decrement the ref count before returning.

Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index a31eeff2b297..16f5bc65771a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	ret = pm_runtime_get_sync(gpu->dev);
 	if (ret < 0) {
 		dev_err(gpu->dev, "Failed to enable GPU power domain\n");
-		return ret;
+		goto pm_put;
 	}
 
 	etnaviv_hw_identify(gpu);
@@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
 	pm_runtime_put_autosuspend(gpu->dev);
 
 	return ret;
@@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 
 	ret = pm_runtime_get_sync(gpu->dev);
 	if (ret < 0)
-		return ret;
+		goto pm_put;
 
 	dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
 	dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
@@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 	ret = 0;
 
 	pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
 	pm_runtime_put_autosuspend(gpu->dev);
 
 	return ret;
@@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 	dev_err(gpu->dev, "recover hung GPU!\n");
 
 	if (pm_runtime_get_sync(gpu->dev) < 0)
-		return;
+		goto pm_put;
 
 	mutex_lock(&gpu->lock);
 
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 
 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
 	pm_runtime_put_autosuspend(gpu->dev);
 }
 
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 
 	if (!submit->runtime_resumed) {
 		ret = pm_runtime_get_sync(gpu->dev);
-		if (ret < 0)
+		if (ret < 0) {
+			pm_runtime_put(gpu->dev);
 			return NULL;
+		}
 		submit->runtime_resumed = true;
 	}
 
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 	ret = event_alloc(gpu, nr_events, event);
 	if (ret) {
 		DRM_ERROR("no free events\n");
+		pm_runtime_put(gpu->dev);
 		return NULL;
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/etnaviv: fix ref count leak via pm_runtime_get_sync
  2020-06-14  6:46 [PATCH] drm/etnaviv: fix ref count leak via pm_runtime_get_sync Navid Emamdoost
@ 2020-06-14  9:48 ` Andy Shevchenko
  2020-06-15  6:12   ` [PATCH v2] " Navid Emamdoost
  2020-06-15  6:13   ` [PATCH] " Navid Emamdoost
  0 siblings, 2 replies; 6+ messages in thread
From: Andy Shevchenko @ 2020-06-14  9:48 UTC (permalink / raw)
  To: Navid Emamdoost
  Cc: Lucas Stach, Russell King, Christian Gmeiner, David Airlie,
	Daniel Vetter, etnaviv, dri-devel, Linux Kernel Mailing List,
	Navid Emamdoost, wu000273, Kangjie Lu, Stephen McCamant

On Sun, Jun 14, 2020 at 9:48 AM Navid Emamdoost
<navid.emamdoost@gmail.com> wrote:

...

> +               if (ret < 0) {

> +                       pm_runtime_put(gpu->dev);

Please, in all your patches fix this to be _put_noidle(). We wouldn't
bear the flag day of fixing these parts again.
Yes, I know that *now* behaviour is the same, but calling put here is
slightly inconsistent.

...

> +               pm_runtime_put(gpu->dev);

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync
  2020-06-14  9:48 ` Andy Shevchenko
@ 2020-06-15  6:12   ` Navid Emamdoost
  2020-06-17  9:53     ` Lucas Stach
  2020-06-15  6:13   ` [PATCH] " Navid Emamdoost
  1 sibling, 1 reply; 6+ messages in thread
From: Navid Emamdoost @ 2020-06-15  6:12 UTC (permalink / raw)
  To: Lucas Stach, Russell King, Christian Gmeiner, David Airlie,
	Daniel Vetter, etnaviv, dri-devel, linux-kernel
  Cc: emamd001, wu000273, kjlu, mccamant, andy.shevchenko, Navid Emamdoost

in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
counter even in case of failure, leading to incorrect ref count.
In case of failure, decrement the ref count before returning.

Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
---
Changes in v2:
	- replace pm_runtime_put with  pm_runtime_put_noidle
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index a31eeff2b297..7c9f3f9ba123 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	ret = pm_runtime_get_sync(gpu->dev);
 	if (ret < 0) {
 		dev_err(gpu->dev, "Failed to enable GPU power domain\n");
-		return ret;
+		goto pm_put;
 	}
 
 	etnaviv_hw_identify(gpu);
@@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
 	pm_runtime_put_autosuspend(gpu->dev);
 
 	return ret;
@@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 
 	ret = pm_runtime_get_sync(gpu->dev);
 	if (ret < 0)
-		return ret;
+		goto pm_put;
 
 	dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
 	dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
@@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 	ret = 0;
 
 	pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
 	pm_runtime_put_autosuspend(gpu->dev);
 
 	return ret;
@@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 	dev_err(gpu->dev, "recover hung GPU!\n");
 
 	if (pm_runtime_get_sync(gpu->dev) < 0)
-		return;
+		goto pm_put;
 
 	mutex_lock(&gpu->lock);
 
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 
 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
 	pm_runtime_put_autosuspend(gpu->dev);
 }
 
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 
 	if (!submit->runtime_resumed) {
 		ret = pm_runtime_get_sync(gpu->dev);
-		if (ret < 0)
+		if (ret < 0) {
+			pm_runtime_put_noidle(gpu->dev);
 			return NULL;
+		}
 		submit->runtime_resumed = true;
 	}
 
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 	ret = event_alloc(gpu, nr_events, event);
 	if (ret) {
 		DRM_ERROR("no free events\n");
+		pm_runtime_put_noidle(gpu->dev);
 		return NULL;
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/etnaviv: fix ref count leak via pm_runtime_get_sync
  2020-06-14  9:48 ` Andy Shevchenko
  2020-06-15  6:12   ` [PATCH v2] " Navid Emamdoost
@ 2020-06-15  6:13   ` Navid Emamdoost
  1 sibling, 0 replies; 6+ messages in thread
From: Navid Emamdoost @ 2020-06-15  6:13 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Lucas Stach, Russell King, Christian Gmeiner, David Airlie,
	Daniel Vetter, etnaviv, dri-devel, Linux Kernel Mailing List,
	Navid Emamdoost, Qiushi Wu, Kangjie Lu, Stephen McCamant

On Sun, Jun 14, 2020 at 4:48 AM Andy Shevchenko
<andy.shevchenko@gmail.com> wrote:
>
> On Sun, Jun 14, 2020 at 9:48 AM Navid Emamdoost
> <navid.emamdoost@gmail.com> wrote:
>
> ...
>
> > +               if (ret < 0) {
>
> > +                       pm_runtime_put(gpu->dev);
>
> Please, in all your patches fix this to be _put_noidle(). We wouldn't
> bear the flag day of fixing these parts again.
> Yes, I know that *now* behaviour is the same, but calling put here is
> slightly inconsistent.

v2 is sent.

>
> ...
>
> > +               pm_runtime_put(gpu->dev);
>
> --
> With Best Regards,
> Andy Shevchenko



-- 
Navid.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync
  2020-06-15  6:12   ` [PATCH v2] " Navid Emamdoost
@ 2020-06-17  9:53     ` Lucas Stach
  2020-06-17 17:57       ` Navid Emamdoost
  0 siblings, 1 reply; 6+ messages in thread
From: Lucas Stach @ 2020-06-17  9:53 UTC (permalink / raw)
  To: Navid Emamdoost, Russell King, Christian Gmeiner, David Airlie,
	Daniel Vetter, etnaviv, dri-devel, linux-kernel
  Cc: kjlu, mccamant, andy.shevchenko, emamd001, wu000273

Hi Navid,

Am Montag, den 15.06.2020, 01:12 -0500 schrieb Navid Emamdoost:
> in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
> and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
> counter even in case of failure, leading to incorrect ref count.
> In case of failure, decrement the ref count before returning.

While that change is correct with the current API, may I ask the
question why the way this API works is considered reasonable? A API
call that fails, but still changes internal state and expects the
caller to clean up the mess it not really what I would consider fool-
proof API design. Is there a specific reason why it is done this way
and not handled internally?

Regards,
Lucas

> Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
> ---
> Changes in v2:
> 	- replace pm_runtime_put with  pm_runtime_put_noidle
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++----
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index a31eeff2b297..7c9f3f9ba123 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  	ret = pm_runtime_get_sync(gpu->dev);
>  	if (ret < 0) {
>  		dev_err(gpu->dev, "Failed to enable GPU power domain\n");
> -		return ret;
> +		goto pm_put;
>  	}
>  
>  	etnaviv_hw_identify(gpu);
> @@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  
>  fail:
>  	pm_runtime_mark_last_busy(gpu->dev);
> +pm_put:
>  	pm_runtime_put_autosuspend(gpu->dev);
>  
>  	return ret;
> @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
>  
>  	ret = pm_runtime_get_sync(gpu->dev);
>  	if (ret < 0)
> -		return ret;
> +		goto pm_put;
>  
>  	dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
>  	dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
> @@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
>  	ret = 0;
>  
>  	pm_runtime_mark_last_busy(gpu->dev);
> +pm_put:
>  	pm_runtime_put_autosuspend(gpu->dev);
>  
>  	return ret;
> @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
>  	dev_err(gpu->dev, "recover hung GPU!\n");
>  
>  	if (pm_runtime_get_sync(gpu->dev) < 0)
> -		return;
> +		goto pm_put;
>  
>  	mutex_lock(&gpu->lock);
>  
> @@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
>  
>  	mutex_unlock(&gpu->lock);
>  	pm_runtime_mark_last_busy(gpu->dev);
> +pm_put:
>  	pm_runtime_put_autosuspend(gpu->dev);
>  }
>  
> @@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
>  
>  	if (!submit->runtime_resumed) {
>  		ret = pm_runtime_get_sync(gpu->dev);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			pm_runtime_put_noidle(gpu->dev);
>  			return NULL;
> +		}
>  		submit->runtime_resumed = true;
>  	}
>  
> @@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
>  	ret = event_alloc(gpu, nr_events, event);
>  	if (ret) {
>  		DRM_ERROR("no free events\n");
> +		pm_runtime_put_noidle(gpu->dev);
>  		return NULL;
>  	}
>  


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync
  2020-06-17  9:53     ` Lucas Stach
@ 2020-06-17 17:57       ` Navid Emamdoost
  0 siblings, 0 replies; 6+ messages in thread
From: Navid Emamdoost @ 2020-06-17 17:57 UTC (permalink / raw)
  To: Lucas Stach
  Cc: Russell King, Christian Gmeiner, David Airlie, Daniel Vetter,
	etnaviv, dri-devel, LKML, Kangjie Lu, mccamant, Andy Shevchenko,
	Navid Emamdoost, Qiushi Wu

Hi Lucas,


On Wed, Jun 17, 2020 at 4:53 AM Lucas Stach <l.stach@pengutronix.de> wrote:
>
> Hi Navid,
>
> Am Montag, den 15.06.2020, 01:12 -0500 schrieb Navid Emamdoost:
> > in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
> > and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
> > counter even in case of failure, leading to incorrect ref count.
> > In case of failure, decrement the ref count before returning.
>
> While that change is correct with the current API, may I ask the
> question why the way this API works is considered reasonable? A API
> call that fails, but still changes internal state and expects the
> caller to clean up the mess it not really what I would consider fool-
> proof API design. Is there a specific reason why it is done this way
> and not handled internally?

I share the same concern with you on the way this API is working now.
To the best of my knowledge, there are ongoing discussions on this
issue:

https://lkml.org/lkml/2020/6/14/76
https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao.liu@zju.edu.cn/

>
> Regards,
> Lucas
>
> > Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
> > ---
> > Changes in v2:
> >       - replace pm_runtime_put with  pm_runtime_put_noidle
> > ---
> >  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++++++++++----
> >  1 file changed, 10 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> > index a31eeff2b297..7c9f3f9ba123 100644
> > --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> > @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
> >       ret = pm_runtime_get_sync(gpu->dev);
> >       if (ret < 0) {
> >               dev_err(gpu->dev, "Failed to enable GPU power domain\n");
> > -             return ret;
> > +             goto pm_put;
> >       }
> >
> >       etnaviv_hw_identify(gpu);
> > @@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
> >
> >  fail:
> >       pm_runtime_mark_last_busy(gpu->dev);
> > +pm_put:
> >       pm_runtime_put_autosuspend(gpu->dev);
> >
> >       return ret;
> > @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
> >
> >       ret = pm_runtime_get_sync(gpu->dev);
> >       if (ret < 0)
> > -             return ret;
> > +             goto pm_put;
> >
> >       dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
> >       dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
> > @@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
> >       ret = 0;
> >
> >       pm_runtime_mark_last_busy(gpu->dev);
> > +pm_put:
> >       pm_runtime_put_autosuspend(gpu->dev);
> >
> >       return ret;
> > @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
> >       dev_err(gpu->dev, "recover hung GPU!\n");
> >
> >       if (pm_runtime_get_sync(gpu->dev) < 0)
> > -             return;
> > +             goto pm_put;
> >
> >       mutex_lock(&gpu->lock);
> >
> > @@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
> >
> >       mutex_unlock(&gpu->lock);
> >       pm_runtime_mark_last_busy(gpu->dev);
> > +pm_put:
> >       pm_runtime_put_autosuspend(gpu->dev);
> >  }
> >
> > @@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
> >
> >       if (!submit->runtime_resumed) {
> >               ret = pm_runtime_get_sync(gpu->dev);
> > -             if (ret < 0)
> > +             if (ret < 0) {
> > +                     pm_runtime_put_noidle(gpu->dev);
> >                       return NULL;
> > +             }
> >               submit->runtime_resumed = true;
> >       }
> >
> > @@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
> >       ret = event_alloc(gpu, nr_events, event);
> >       if (ret) {
> >               DRM_ERROR("no free events\n");
> > +             pm_runtime_put_noidle(gpu->dev);
> >               return NULL;
> >       }
> >
>


--
Navid.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-06-17 17:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-14  6:46 [PATCH] drm/etnaviv: fix ref count leak via pm_runtime_get_sync Navid Emamdoost
2020-06-14  9:48 ` Andy Shevchenko
2020-06-15  6:12   ` [PATCH v2] " Navid Emamdoost
2020-06-17  9:53     ` Lucas Stach
2020-06-17 17:57       ` Navid Emamdoost
2020-06-15  6:13   ` [PATCH] " Navid Emamdoost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).