All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
@ 2021-11-04  8:19 Evan Quan
  2021-11-04  8:24 ` Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Evan Quan @ 2021-11-04  8:19 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

It's confirmed that on some APUs the interaction with SMU about DPM disablement
will power off the UVD completely. Thus the succeeding interactions with UVD
during the reboot will trigger hard hang. To workaround this issue, we will skip
the dpm disablement on APUs.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
---
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
 4 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c108b8381795..67ec13622e51 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
 	 */
 	cancel_delayed_work_sync(&adev->uvd.idle_work);
 
-	if (adev->pm.dpm_enabled) {
-		amdgpu_dpm_enable_uvd(adev, false);
-	} else {
-		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-		/* shutdown the UVD block */
-		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-						       AMD_PG_STATE_GATE);
-		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-						       AMD_CG_STATE_GATE);
+	if (!(adev->flags & AMD_IS_APU)) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, false);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+			/* shutdown the UVD block */
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_CG_STATE_GATE);
+		}
 	}
 
 	r = uvd_v4_2_hw_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 2d558c2f417d..60d05ec8c953 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
 	 */
 	cancel_delayed_work_sync(&adev->uvd.idle_work);
 
-	if (adev->pm.dpm_enabled) {
-		amdgpu_dpm_enable_uvd(adev, false);
-	} else {
-		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-		/* shutdown the UVD block */
-		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-						       AMD_PG_STATE_GATE);
-		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-						       AMD_CG_STATE_GATE);
+	/*
+	 * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
+	 * will power off the UVD. That will make the succeeding interactions with UVD on the
+	 * suspend path impossible. And the system will hang due to that. To workaround the
+	 * issue, we will skip the dpm disablement on APUs.
+	 *
+	 * TODO: a better solution is to reorg the action chains performed on suspend and make
+	 * the dpm disablement the last one. But that will involve a lot and needs MM team's
+	 * help.
+	 */
+	if (!(adev->flags & AMD_IS_APU)) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, false);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+			/* shutdown the UVD block */
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_CG_STATE_GATE);
+		}
 	}
 
 	r = uvd_v6_0_hw_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 67eb01fef789..8aa9d8c07053 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
 	 */
 	cancel_delayed_work_sync(&adev->vce.idle_work);
 
-	if (adev->pm.dpm_enabled) {
-		amdgpu_dpm_enable_vce(adev, false);
-	} else {
-		amdgpu_asic_set_vce_clocks(adev, 0, 0);
-		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-						       AMD_PG_STATE_GATE);
-		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-						       AMD_CG_STATE_GATE);
+	if (!(adev->flags & AMD_IS_APU)) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_vce(adev, false);
+		} else {
+			amdgpu_asic_set_vce_clocks(adev, 0, 0);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_GATE);
+		}
 	}
 
 	r = vce_v2_0_hw_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 142e291983b4..b177cd442838 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
 	 */
 	cancel_delayed_work_sync(&adev->vce.idle_work);
 
-	if (adev->pm.dpm_enabled) {
-		amdgpu_dpm_enable_vce(adev, false);
-	} else {
-		amdgpu_asic_set_vce_clocks(adev, 0, 0);
-		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-						       AMD_PG_STATE_GATE);
-		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-						       AMD_CG_STATE_GATE);
+	if (!(adev->flags & AMD_IS_APU)) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_vce(adev, false);
+		} else {
+			amdgpu_asic_set_vce_clocks(adev, 0, 0);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_GATE);
+		}
 	}
 
 	r = vce_v3_0_hw_fini(adev);
-- 
2.29.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
  2021-11-04  8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
@ 2021-11-04  8:24 ` Christian König
  2021-11-04  8:54 ` Lazar, Lijo
  2021-11-05  2:03 ` James Zhu
  2 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2021-11-04  8:24 UTC (permalink / raw)
  To: Evan Quan, amd-gfx, Liu, Leo; +Cc: Alexander.Deucher, Lijo.Lazar

Am 04.11.21 um 09:19 schrieb Evan Quan:
> It's confirmed that on some APUs the interaction with SMU about DPM disablement
> will power off the UVD completely. Thus the succeeding interactions with UVD
> during the reboot will trigger hard hang. To workaround this issue, we will skip
> the dpm disablement on APUs.
>
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13

Acked-by: Christian König <christian.koenig@amd.com>

But Leo should take a look as well, adding him on CC.

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
>   4 files changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index c108b8381795..67ec13622e51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->uvd.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_uvd(adev, false);
> -	} else {
> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> -		/* shutdown the UVD block */
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_uvd(adev, false);
> +		} else {
> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +			/* shutdown the UVD block */
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = uvd_v4_2_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 2d558c2f417d..60d05ec8c953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->uvd.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_uvd(adev, false);
> -	} else {
> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> -		/* shutdown the UVD block */
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_CG_STATE_GATE);
> +	/*
> +	 * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
> +	 * will power off the UVD. That will make the succeeding interactions with UVD on the
> +	 * suspend path impossible. And the system will hang due to that. To workaround the
> +	 * issue, we will skip the dpm disablement on APUs.
> +	 *
> +	 * TODO: a better solution is to reorg the action chains performed on suspend and make
> +	 * the dpm disablement the last one. But that will involve a lot and needs MM team's
> +	 * help.
> +	 */
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_uvd(adev, false);
> +		} else {
> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +			/* shutdown the UVD block */
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = uvd_v6_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index 67eb01fef789..8aa9d8c07053 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->vce.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_vce(adev, false);
> -	} else {
> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_vce(adev, false);
> +		} else {
> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = vce_v2_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 142e291983b4..b177cd442838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->vce.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_vce(adev, false);
> -	} else {
> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_vce(adev, false);
> +		} else {
> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = vce_v3_0_hw_fini(adev);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
  2021-11-04  8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
  2021-11-04  8:24 ` Christian König
@ 2021-11-04  8:54 ` Lazar, Lijo
  2021-11-05  7:58   ` Quan, Evan
  2021-11-05  2:03 ` James Zhu
  2 siblings, 1 reply; 7+ messages in thread
From: Lazar, Lijo @ 2021-11-04  8:54 UTC (permalink / raw)
  To: Evan Quan, amd-gfx; +Cc: Alexander.Deucher



On 11/4/2021 1:49 PM, Evan Quan wrote:
> It's confirmed that on some APUs the interaction with SMU about DPM disablement
> will power off the UVD completely. Thus the succeeding interactions with UVD
> during the reboot will trigger hard hang. To workaround this issue, we will skip
> the dpm disablement on APUs.
> 
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> ---
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
>   4 files changed, 52 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index c108b8381795..67ec13622e51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->uvd.idle_work);

If the idle work handler had already started execution, it also goes 
through the same logic. Wouldn't that be a problem? The other case is if 
decode work is already completed before suspend is called, then also it 
disables DPM. Not sure how it works then, or is this caused by a second 
atempt to power off again after idle work is executed?

Thanks,
Lijo

>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_uvd(adev, false);
> -	} else {
> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> -		/* shutdown the UVD block */
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_uvd(adev, false);
> +		} else {
> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +			/* shutdown the UVD block */
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = uvd_v4_2_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 2d558c2f417d..60d05ec8c953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->uvd.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_uvd(adev, false);
> -	} else {
> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> -		/* shutdown the UVD block */
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_CG_STATE_GATE);
> +	/*
> +	 * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
> +	 * will power off the UVD. That will make the succeeding interactions with UVD on the
> +	 * suspend path impossible. And the system will hang due to that. To workaround the
> +	 * issue, we will skip the dpm disablement on APUs.
> +	 *
> +	 * TODO: a better solution is to reorg the action chains performed on suspend and make
> +	 * the dpm disablement the last one. But that will involve a lot and needs MM team's
> +	 * help.
> +	 */
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_uvd(adev, false);
> +		} else {
> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +			/* shutdown the UVD block */
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = uvd_v6_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index 67eb01fef789..8aa9d8c07053 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->vce.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_vce(adev, false);
> -	} else {
> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_vce(adev, false);
> +		} else {
> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = vce_v2_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 142e291983b4..b177cd442838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->vce.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_vce(adev, false);
> -	} else {
> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_vce(adev, false);
> +		} else {
> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = vce_v3_0_hw_fini(adev);
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
  2021-11-04  8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
  2021-11-04  8:24 ` Christian König
  2021-11-04  8:54 ` Lazar, Lijo
@ 2021-11-05  2:03 ` James Zhu
  2021-11-05  8:14   ` Quan, Evan
  2 siblings, 1 reply; 7+ messages in thread
From: James Zhu @ 2021-11-05  2:03 UTC (permalink / raw)
  To: amd-gfx


On 2021-11-04 4:19 a.m., Evan Quan wrote:
> It's confirmed that on some APUs the interaction with SMU about DPM disablement
> will power off the UVD completely. Thus the succeeding interactions with UVD
> during the reboot will trigger hard hang. To workaround this issue, we will skip
> the dpm disablement on APUs.
>
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> ---
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
>   4 files changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index c108b8381795..67ec13622e51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->uvd.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_uvd(adev, false);

[JZ] Hi Evan, VCN code put amdgpu_dpm_enable_uvd(false) at the end of 
stop.  Can we do the same for uvd/vce?

Here, it is possible that some dec/enc jobs are still running when dpm 
is called. I am not sure if this situation caused hard hang during reboot.

> -	} else {
> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> -		/* shutdown the UVD block */
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_uvd(adev, false);
> +		} else {
> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +			/* shutdown the UVD block */
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = uvd_v4_2_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 2d558c2f417d..60d05ec8c953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->uvd.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_uvd(adev, false);
> -	} else {
> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> -		/* shutdown the UVD block */
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> -						       AMD_CG_STATE_GATE);
> +	/*
> +	 * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
> +	 * will power off the UVD. That will make the succeeding interactions with UVD on the
> +	 * suspend path impossible. And the system will hang due to that. To workaround the
> +	 * issue, we will skip the dpm disablement on APUs.
> +	 *
> +	 * TODO: a better solution is to reorg the action chains performed on suspend and make
> +	 * the dpm disablement the last one. But that will involve a lot and needs MM team's
> +	 * help.
> +	 */
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_uvd(adev, false);
> +		} else {
> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> +			/* shutdown the UVD block */
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = uvd_v6_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index 67eb01fef789..8aa9d8c07053 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->vce.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_vce(adev, false);
> -	} else {
> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_vce(adev, false);
> +		} else {
> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = vce_v2_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 142e291983b4..b177cd442838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
>   	 */
>   	cancel_delayed_work_sync(&adev->vce.idle_work);
>   
> -	if (adev->pm.dpm_enabled) {
> -		amdgpu_dpm_enable_vce(adev, false);
> -	} else {
> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_PG_STATE_GATE);
> -		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> -						       AMD_CG_STATE_GATE);
> +	if (!(adev->flags & AMD_IS_APU)) {
> +		if (adev->pm.dpm_enabled) {
> +			amdgpu_dpm_enable_vce(adev, false);
> +		} else {
> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> +			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_PG_STATE_GATE);
> +			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> +							       AMD_CG_STATE_GATE);
> +		}
>   	}
>   
>   	r = vce_v3_0_hw_fini(adev);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
  2021-11-04  8:54 ` Lazar, Lijo
@ 2021-11-05  7:58   ` Quan, Evan
  2021-11-08 10:26     ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Quan, Evan @ 2021-11-05  7:58 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]



> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Thursday, November 4, 2021 4:55 PM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
> reboot
> 
> 
> 
> On 11/4/2021 1:49 PM, Evan Quan wrote:
> > It's confirmed that on some APUs the interaction with SMU about DPM
> > disablement will power off the UVD completely. Thus the succeeding
> > interactions with UVD during the reboot will trigger hard hang. To
> > workaround this issue, we will skip the dpm disablement on APUs.
> >
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> > ---
> >   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> >   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
> +++++++++++++++++++--------
> >   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> >   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> >   4 files changed, 52 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > index c108b8381795..67ec13622e51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->uvd.idle_work);
> 
> If the idle work handler had already started execution, it also goes through
> the same logic. Wouldn't that be a problem? The other case is if decode work
> is already completed before suspend is called, then also it disables DPM. Not
> sure how it works then, or is this caused by a second atempt to power off
> again after idle work is executed?
[Quan, Evan] Good point. Yes, maybe there should not be 2nd attempt when the target IP is already in the desired state.
Let me confirm with Boris.

BR
Evan
> 
> Thanks,
> Lijo
> 
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_uvd(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > -		/* shutdown the UVD block */
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_uvd(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > +			/* shutdown the UVD block */
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = uvd_v4_2_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > index 2d558c2f417d..60d05ec8c953 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_uvd(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > -		/* shutdown the UVD block */
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_CG_STATE_GATE);
> > +	/*
> > +	 * It's confirmed that on some APUs the interaction with SMU(about
> DPM disablement)
> > +	 * will power off the UVD. That will make the succeeding interactions
> with UVD on the
> > +	 * suspend path impossible. And the system will hang due to that. To
> workaround the
> > +	 * issue, we will skip the dpm disablement on APUs.
> > +	 *
> > +	 * TODO: a better solution is to reorg the action chains performed on
> suspend and make
> > +	 * the dpm disablement the last one. But that will involve a lot and
> needs MM team's
> > +	 * help.
> > +	 */
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_uvd(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > +			/* shutdown the UVD block */
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = uvd_v6_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > index 67eb01fef789..8aa9d8c07053 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_vce(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_vce(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = vce_v2_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > index 142e291983b4..b177cd442838 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_vce(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_vce(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = vce_v3_0_hw_fini(adev);
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
  2021-11-05  2:03 ` James Zhu
@ 2021-11-05  8:14   ` Quan, Evan
  0 siblings, 0 replies; 7+ messages in thread
From: Quan, Evan @ 2021-11-05  8:14 UTC (permalink / raw)
  To: Zhu, James, amd-gfx

[AMD Official Use Only]



> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> James Zhu
> Sent: Friday, November 5, 2021 10:03 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
> reboot
> 
> 
> On 2021-11-04 4:19 a.m., Evan Quan wrote:
> > It's confirmed that on some APUs the interaction with SMU about DPM
> > disablement will power off the UVD completely. Thus the succeeding
> > interactions with UVD during the reboot will trigger hard hang. To
> > workaround this issue, we will skip the dpm disablement on APUs.
> >
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> > ---
> >   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> >   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
> +++++++++++++++++++--------
> >   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> >   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> >   4 files changed, 52 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > index c108b8381795..67ec13622e51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_uvd(adev, false);
> 
> [JZ] Hi Evan, VCN code put amdgpu_dpm_enable_uvd(false) at the end of
> stop.  Can we do the same for uvd/vce?
[Quan, Evan] Sounds reasonable to me. Actually Lijo provided some insights which enlightened me.
I will drop this patch and provide a new one.

BR
Evan
> 
> Here, it is possible that some dec/enc jobs are still running when dpm is
> called. I am not sure if this situation caused hard hang during reboot.
> 
> > -	} else {
> > -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > -		/* shutdown the UVD block */
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_uvd(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > +			/* shutdown the UVD block */
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = uvd_v4_2_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > index 2d558c2f417d..60d05ec8c953 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_uvd(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > -		/* shutdown the UVD block */
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > -						       AMD_CG_STATE_GATE);
> > +	/*
> > +	 * It's confirmed that on some APUs the interaction with SMU(about
> DPM disablement)
> > +	 * will power off the UVD. That will make the succeeding interactions
> with UVD on the
> > +	 * suspend path impossible. And the system will hang due to that. To
> workaround the
> > +	 * issue, we will skip the dpm disablement on APUs.
> > +	 *
> > +	 * TODO: a better solution is to reorg the action chains performed on
> suspend and make
> > +	 * the dpm disablement the last one. But that will involve a lot and
> needs MM team's
> > +	 * help.
> > +	 */
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_uvd(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > +			/* shutdown the UVD block */
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = uvd_v6_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > index 67eb01fef789..8aa9d8c07053 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_vce(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_vce(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = vce_v2_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > index 142e291983b4..b177cd442838 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> >   	 */
> >   	cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > -	if (adev->pm.dpm_enabled) {
> > -		amdgpu_dpm_enable_vce(adev, false);
> > -	} else {
> > -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > -		amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_PG_STATE_GATE);
> > -		amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > -						       AMD_CG_STATE_GATE);
> > +	if (!(adev->flags & AMD_IS_APU)) {
> > +		if (adev->pm.dpm_enabled) {
> > +			amdgpu_dpm_enable_vce(adev, false);
> > +		} else {
> > +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > +			amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > +			amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > +		}
> >   	}
> >
> >   	r = vce_v3_0_hw_fini(adev);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
  2021-11-05  7:58   ` Quan, Evan
@ 2021-11-08 10:26     ` Christian König
  0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2021-11-08 10:26 UTC (permalink / raw)
  To: Quan, Evan, Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander

Am 05.11.21 um 08:58 schrieb Quan, Evan:
> [AMD Official Use Only]
>
>> -----Original Message-----
>> From: Lazar, Lijo <Lijo.Lazar@amd.com>
>> Sent: Thursday, November 4, 2021 4:55 PM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
>> reboot
>>
>>
>>
>> On 11/4/2021 1:49 PM, Evan Quan wrote:
>>> It's confirmed that on some APUs the interaction with SMU about DPM
>>> disablement will power off the UVD completely. Thus the succeeding
>>> interactions with UVD during the reboot will trigger hard hang. To
>>> workaround this issue, we will skip the dpm disablement on APUs.
>>>
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
>>>    drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
>> +++++++++++++++++++--------
>>>    drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
>>>    drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
>>>    4 files changed, 52 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> index c108b8381795..67ec13622e51 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
>>>    	 */
>>>    	cancel_delayed_work_sync(&adev->uvd.idle_work);
>> If the idle work handler had already started execution, it also goes through
>> the same logic. Wouldn't that be a problem? The other case is if decode work
>> is already completed before suspend is called, then also it disables DPM. Not
>> sure how it works then, or is this caused by a second atempt to power off
>> again after idle work is executed?
> [Quan, Evan] Good point. Yes, maybe there should not be 2nd attempt when the target IP is already in the desired state.
> Let me confirm with Boris.

It sounds like you not only need to prevent the clock gating, but also 
enable the clocks during shutdown.

Regards,
Christian.

>
> BR
> Evan
>> Thanks,
>> Lijo
>>
>>> -	if (adev->pm.dpm_enabled) {
>>> -		amdgpu_dpm_enable_uvd(adev, false);
>>> -	} else {
>>> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> -		/* shutdown the UVD block */
>>> -		amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> -						       AMD_PG_STATE_GATE);
>>> -		amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> -						       AMD_CG_STATE_GATE);
>>> +	if (!(adev->flags & AMD_IS_APU)) {
>>> +		if (adev->pm.dpm_enabled) {
>>> +			amdgpu_dpm_enable_uvd(adev, false);
>>> +		} else {
>>> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> +			/* shutdown the UVD block */
>>> +			amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_PG_STATE_GATE);
>>> +			amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_CG_STATE_GATE);
>>> +		}
>>>    	}
>>>
>>>    	r = uvd_v4_2_hw_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> index 2d558c2f417d..60d05ec8c953 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
>>>    	 */
>>>    	cancel_delayed_work_sync(&adev->uvd.idle_work);
>>>
>>> -	if (adev->pm.dpm_enabled) {
>>> -		amdgpu_dpm_enable_uvd(adev, false);
>>> -	} else {
>>> -		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> -		/* shutdown the UVD block */
>>> -		amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> -						       AMD_PG_STATE_GATE);
>>> -		amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> -						       AMD_CG_STATE_GATE);
>>> +	/*
>>> +	 * It's confirmed that on some APUs the interaction with SMU(about
>> DPM disablement)
>>> +	 * will power off the UVD. That will make the succeeding interactions
>> with UVD on the
>>> +	 * suspend path impossible. And the system will hang due to that. To
>> workaround the
>>> +	 * issue, we will skip the dpm disablement on APUs.
>>> +	 *
>>> +	 * TODO: a better solution is to reorg the action chains performed on
>> suspend and make
>>> +	 * the dpm disablement the last one. But that will involve a lot and
>> needs MM team's
>>> +	 * help.
>>> +	 */
>>> +	if (!(adev->flags & AMD_IS_APU)) {
>>> +		if (adev->pm.dpm_enabled) {
>>> +			amdgpu_dpm_enable_uvd(adev, false);
>>> +		} else {
>>> +			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> +			/* shutdown the UVD block */
>>> +			amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_PG_STATE_GATE);
>>> +			amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_CG_STATE_GATE);
>>> +		}
>>>    	}
>>>
>>>    	r = uvd_v6_0_hw_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> index 67eb01fef789..8aa9d8c07053 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
>>>    	 */
>>>    	cancel_delayed_work_sync(&adev->vce.idle_work);
>>>
>>> -	if (adev->pm.dpm_enabled) {
>>> -		amdgpu_dpm_enable_vce(adev, false);
>>> -	} else {
>>> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> -		amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> -						       AMD_PG_STATE_GATE);
>>> -		amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> -						       AMD_CG_STATE_GATE);
>>> +	if (!(adev->flags & AMD_IS_APU)) {
>>> +		if (adev->pm.dpm_enabled) {
>>> +			amdgpu_dpm_enable_vce(adev, false);
>>> +		} else {
>>> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> +			amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_PG_STATE_GATE);
>>> +			amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_CG_STATE_GATE);
>>> +		}
>>>    	}
>>>
>>>    	r = vce_v2_0_hw_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> index 142e291983b4..b177cd442838 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
>>>    	 */
>>>    	cancel_delayed_work_sync(&adev->vce.idle_work);
>>>
>>> -	if (adev->pm.dpm_enabled) {
>>> -		amdgpu_dpm_enable_vce(adev, false);
>>> -	} else {
>>> -		amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> -		amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> -						       AMD_PG_STATE_GATE);
>>> -		amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> -						       AMD_CG_STATE_GATE);
>>> +	if (!(adev->flags & AMD_IS_APU)) {
>>> +		if (adev->pm.dpm_enabled) {
>>> +			amdgpu_dpm_enable_vce(adev, false);
>>> +		} else {
>>> +			amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> +			amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_PG_STATE_GATE);
>>> +			amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_CG_STATE_GATE);
>>> +		}
>>>    	}
>>>
>>>    	r = vce_v3_0_hw_fini(adev);
>>>


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-11-08 10:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-04  8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
2021-11-04  8:24 ` Christian König
2021-11-04  8:54 ` Lazar, Lijo
2021-11-05  7:58   ` Quan, Evan
2021-11-08 10:26     ` Christian König
2021-11-05  2:03 ` James Zhu
2021-11-05  8:14   ` Quan, Evan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.