All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/amdgpu: Add mutex to protect IB resources
@ 2019-11-06  6:56 ` Jesse Zhang
  0 siblings, 0 replies; 4+ messages in thread
From: Jesse Zhang @ 2019-11-06  6:56 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Jesse Zhang

Unloading driver has call trace when unloading happens
2s after loading driver.

Since ring test are delayed after initializing driver,
it is possible that driver has been unloaded before or
while doing ring test.

Add mutex to prevent ring test and driver finalization
occurs at the same time and check before doing ring test
if required resources still exist.

Change-Id: I27b52c2c630ad3853c6384e7b6906f0fae590ead
Signed-off-by: Jesse Zhang <zhexi.zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 9 +++++++++
 3 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0469cc5..b825ad0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -923,6 +923,7 @@ struct amdgpu_device {
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
 	struct amdgpu_sa_manager	ring_tmp_bo;
+	struct mutex			ib_lock;
 
 	/* interrupts */
 	struct amdgpu_irq		irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b2f38b0..8adbb25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2806,6 +2806,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->lock_reset);
 	mutex_init(&adev->virt.dpm_mutex);
 	mutex_init(&adev->psp.mutex);
+	mutex_init(&adev->ib_lock);
 
 	r = amdgpu_device_check_arguments(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index d121bbd..ef5339e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -317,10 +317,12 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
  */
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
+	mutex_lock(&adev->ib_lock);
 	if (adev->ib_pool_ready) {
 		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
 		adev->ib_pool_ready = false;
 	}
+	mutex_unlock(&adev->ib_lock);
 }
 
 /**
@@ -364,6 +366,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = adev->rings[i];
 		long tmo;
 
+		if (adev->shutdown) {
+			DRM_INFO("Device finalized, skip unfinished ring test\n");
+			return 0;
+		}
+
 		/* KIQ rings don't have an IB test because we never submit IBs
 		 * to them and they have no interrupt support.
 		 */
@@ -381,7 +388,9 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		else
 			tmo = tmo_gfx;
 
+		mutex_lock(&adev->ib_lock);
 		r = amdgpu_ring_test_ib(ring, tmo);
+		mutex_unlock(&adev->ib_lock);
 		if (!r) {
 			DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
 				      ring->name);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] drm/amd/amdgpu: Add mutex to protect IB resources
@ 2019-11-06  6:56 ` Jesse Zhang
  0 siblings, 0 replies; 4+ messages in thread
From: Jesse Zhang @ 2019-11-06  6:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: Jesse Zhang

Unloading driver has call trace when unloading happens
2s after loading driver.

Since ring test are delayed after initializing driver,
it is possible that driver has been unloaded before or
while doing ring test.

Add mutex to prevent ring test and driver finalization
occurs at the same time and check before doing ring test
if required resources still exist.

Change-Id: I27b52c2c630ad3853c6384e7b6906f0fae590ead
Signed-off-by: Jesse Zhang <zhexi.zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 9 +++++++++
 3 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0469cc5..b825ad0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -923,6 +923,7 @@ struct amdgpu_device {
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
 	struct amdgpu_sa_manager	ring_tmp_bo;
+	struct mutex			ib_lock;
 
 	/* interrupts */
 	struct amdgpu_irq		irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b2f38b0..8adbb25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2806,6 +2806,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->lock_reset);
 	mutex_init(&adev->virt.dpm_mutex);
 	mutex_init(&adev->psp.mutex);
+	mutex_init(&adev->ib_lock);
 
 	r = amdgpu_device_check_arguments(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index d121bbd..ef5339e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -317,10 +317,12 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
  */
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
+	mutex_lock(&adev->ib_lock);
 	if (adev->ib_pool_ready) {
 		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
 		adev->ib_pool_ready = false;
 	}
+	mutex_unlock(&adev->ib_lock);
 }
 
 /**
@@ -364,6 +366,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = adev->rings[i];
 		long tmo;
 
+		if (adev->shutdown) {
+			DRM_INFO("Device finalized, skip unfinished ring test\n");
+			return 0;
+		}
+
 		/* KIQ rings don't have an IB test because we never submit IBs
 		 * to them and they have no interrupt support.
 		 */
@@ -381,7 +388,9 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		else
 			tmo = tmo_gfx;
 
+		mutex_lock(&adev->ib_lock);
 		r = amdgpu_ring_test_ib(ring, tmo);
+		mutex_unlock(&adev->ib_lock);
 		if (!r) {
 			DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
 				      ring->name);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add mutex to protect IB resources
@ 2019-11-06  9:35     ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2019-11-06  9:35 UTC (permalink / raw)
  To: Jesse Zhang, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 06.11.19 um 07:56 schrieb Jesse Zhang:
> Unloading driver has call trace when unloading happens
> 2s after loading driver.
>
> Since ring test are delayed after initializing driver,
> it is possible that driver has been unloaded before or
> while doing ring test.
>
> Add mutex to prevent ring test and driver finalization
> occurs at the same time and check before doing ring test
> if required resources still exist.

NAK, a mutex is the wrong approach.

You just need to add an flush_delayed_work(&adev->delayed_init_work) to 
amdgpu_device_fini().

Regards,
Christian.

>
> Change-Id: I27b52c2c630ad3853c6384e7b6906f0fae590ead
> Signed-off-by: Jesse Zhang <zhexi.zhang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 9 +++++++++
>   3 files changed, 11 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 0469cc5..b825ad0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -923,6 +923,7 @@ struct amdgpu_device {
>   	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
>   	bool				ib_pool_ready;
>   	struct amdgpu_sa_manager	ring_tmp_bo;
> +	struct mutex			ib_lock;
>   
>   	/* interrupts */
>   	struct amdgpu_irq		irq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b2f38b0..8adbb25 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2806,6 +2806,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	mutex_init(&adev->lock_reset);
>   	mutex_init(&adev->virt.dpm_mutex);
>   	mutex_init(&adev->psp.mutex);
> +	mutex_init(&adev->ib_lock);
>   
>   	r = amdgpu_device_check_arguments(adev);
>   	if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d121bbd..ef5339e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -317,10 +317,12 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>    */
>   void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
>   {
> +	mutex_lock(&adev->ib_lock);
>   	if (adev->ib_pool_ready) {
>   		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
>   		adev->ib_pool_ready = false;
>   	}
> +	mutex_unlock(&adev->ib_lock);
>   }
>   
>   /**
> @@ -364,6 +366,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
>   		struct amdgpu_ring *ring = adev->rings[i];
>   		long tmo;
>   
> +		if (adev->shutdown) {
> +			DRM_INFO("Device finalized, skip unfinished ring test\n");
> +			return 0;
> +		}
> +
>   		/* KIQ rings don't have an IB test because we never submit IBs
>   		 * to them and they have no interrupt support.
>   		 */
> @@ -381,7 +388,9 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
>   		else
>   			tmo = tmo_gfx;
>   
> +		mutex_lock(&adev->ib_lock);
>   		r = amdgpu_ring_test_ib(ring, tmo);
> +		mutex_unlock(&adev->ib_lock);
>   		if (!r) {
>   			DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
>   				      ring->name);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add mutex to protect IB resources
@ 2019-11-06  9:35     ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2019-11-06  9:35 UTC (permalink / raw)
  To: Jesse Zhang, amd-gfx

Am 06.11.19 um 07:56 schrieb Jesse Zhang:
> Unloading driver has call trace when unloading happens
> 2s after loading driver.
>
> Since ring test are delayed after initializing driver,
> it is possible that driver has been unloaded before or
> while doing ring test.
>
> Add mutex to prevent ring test and driver finalization
> occurs at the same time and check before doing ring test
> if required resources still exist.

NAK, a mutex is the wrong approach.

You just need to add an flush_delayed_work(&adev->delayed_init_work) to 
amdgpu_device_fini().

Regards,
Christian.

>
> Change-Id: I27b52c2c630ad3853c6384e7b6906f0fae590ead
> Signed-off-by: Jesse Zhang <zhexi.zhang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 9 +++++++++
>   3 files changed, 11 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 0469cc5..b825ad0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -923,6 +923,7 @@ struct amdgpu_device {
>   	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
>   	bool				ib_pool_ready;
>   	struct amdgpu_sa_manager	ring_tmp_bo;
> +	struct mutex			ib_lock;
>   
>   	/* interrupts */
>   	struct amdgpu_irq		irq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b2f38b0..8adbb25 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2806,6 +2806,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	mutex_init(&adev->lock_reset);
>   	mutex_init(&adev->virt.dpm_mutex);
>   	mutex_init(&adev->psp.mutex);
> +	mutex_init(&adev->ib_lock);
>   
>   	r = amdgpu_device_check_arguments(adev);
>   	if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d121bbd..ef5339e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -317,10 +317,12 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>    */
>   void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
>   {
> +	mutex_lock(&adev->ib_lock);
>   	if (adev->ib_pool_ready) {
>   		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
>   		adev->ib_pool_ready = false;
>   	}
> +	mutex_unlock(&adev->ib_lock);
>   }
>   
>   /**
> @@ -364,6 +366,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
>   		struct amdgpu_ring *ring = adev->rings[i];
>   		long tmo;
>   
> +		if (adev->shutdown) {
> +			DRM_INFO("Device finalized, skip unfinished ring test\n");
> +			return 0;
> +		}
> +
>   		/* KIQ rings don't have an IB test because we never submit IBs
>   		 * to them and they have no interrupt support.
>   		 */
> @@ -381,7 +388,9 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
>   		else
>   			tmo = tmo_gfx;
>   
> +		mutex_lock(&adev->ib_lock);
>   		r = amdgpu_ring_test_ib(ring, tmo);
> +		mutex_unlock(&adev->ib_lock);
>   		if (!r) {
>   			DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
>   				      ring->name);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-11-06  9:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-06  6:56 [PATCH] drm/amd/amdgpu: Add mutex to protect IB resources Jesse Zhang
2019-11-06  6:56 ` Jesse Zhang
     [not found] ` <1573023416-10462-1-git-send-email-zhexi.zhang-5C7GfCeVMHo@public.gmane.org>
2019-11-06  9:35   ` Christian König
2019-11-06  9:35     ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.