* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
2021-11-04 8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
@ 2021-11-04 8:24 ` Christian König
2021-11-04 8:54 ` Lazar, Lijo
2021-11-05 2:03 ` James Zhu
2 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2021-11-04 8:24 UTC (permalink / raw)
To: Evan Quan, amd-gfx, Liu, Leo; +Cc: Alexander.Deucher, Lijo.Lazar
Am 04.11.21 um 09:19 schrieb Evan Quan:
> It's confirmed that on some APUs the interaction with SMU about DPM disablement
> will power off the UVD completely. Thus the succeeding interactions with UVD
> during the reboot will trigger hard hang. To workaround this issue, we will skip
> the dpm disablement on APUs.
>
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
Acked-by: Christian König <christian.koenig@amd.com>
But Leo should take a look as well, adding him on CC.
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
> drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> 4 files changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index c108b8381795..67ec13622e51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->uvd.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_uvd(adev, false);
> - } else {
> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> - /* shutdown the UVD block */
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_uvd(adev, false);
> + } else {
> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> + /* shutdown the UVD block */
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = uvd_v4_2_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 2d558c2f417d..60d05ec8c953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->uvd.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_uvd(adev, false);
> - } else {
> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> - /* shutdown the UVD block */
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_CG_STATE_GATE);
> + /*
> + * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
> + * will power off the UVD. That will make the succeeding interactions with UVD on the
> + * suspend path impossible. And the system will hang due to that. To workaround the
> + * issue, we will skip the dpm disablement on APUs.
> + *
> + * TODO: a better solution is to reorg the action chains performed on suspend and make
> + * the dpm disablement the last one. But that will involve a lot and needs MM team's
> + * help.
> + */
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_uvd(adev, false);
> + } else {
> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> + /* shutdown the UVD block */
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = uvd_v6_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index 67eb01fef789..8aa9d8c07053 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->vce.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_vce(adev, false);
> - } else {
> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_vce(adev, false);
> + } else {
> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = vce_v2_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 142e291983b4..b177cd442838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->vce.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_vce(adev, false);
> - } else {
> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_vce(adev, false);
> + } else {
> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = vce_v3_0_hw_fini(adev);
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
2021-11-04 8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
2021-11-04 8:24 ` Christian König
@ 2021-11-04 8:54 ` Lazar, Lijo
2021-11-05 7:58 ` Quan, Evan
2021-11-05 2:03 ` James Zhu
2 siblings, 1 reply; 7+ messages in thread
From: Lazar, Lijo @ 2021-11-04 8:54 UTC (permalink / raw)
To: Evan Quan, amd-gfx; +Cc: Alexander.Deucher
On 11/4/2021 1:49 PM, Evan Quan wrote:
> It's confirmed that on some APUs the interaction with SMU about DPM disablement
> will power off the UVD completely. Thus the succeeding interactions with UVD
> during the reboot will trigger hard hang. To workaround this issue, we will skip
> the dpm disablement on APUs.
>
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> ---
> drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
> drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> 4 files changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index c108b8381795..67ec13622e51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->uvd.idle_work);
If the idle work handler had already started execution, it also goes
through the same logic. Wouldn't that be a problem? The other case is if
decode work is already completed before suspend is called, then also it
disables DPM. Not sure how it works then, or is this caused by a second
atempt to power off again after idle work is executed?
Thanks,
Lijo
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_uvd(adev, false);
> - } else {
> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> - /* shutdown the UVD block */
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_uvd(adev, false);
> + } else {
> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> + /* shutdown the UVD block */
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = uvd_v4_2_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 2d558c2f417d..60d05ec8c953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->uvd.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_uvd(adev, false);
> - } else {
> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> - /* shutdown the UVD block */
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_CG_STATE_GATE);
> + /*
> + * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
> + * will power off the UVD. That will make the succeeding interactions with UVD on the
> + * suspend path impossible. And the system will hang due to that. To workaround the
> + * issue, we will skip the dpm disablement on APUs.
> + *
> + * TODO: a better solution is to reorg the action chains performed on suspend and make
> + * the dpm disablement the last one. But that will involve a lot and needs MM team's
> + * help.
> + */
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_uvd(adev, false);
> + } else {
> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> + /* shutdown the UVD block */
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = uvd_v6_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index 67eb01fef789..8aa9d8c07053 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->vce.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_vce(adev, false);
> - } else {
> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_vce(adev, false);
> + } else {
> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = vce_v2_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 142e291983b4..b177cd442838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->vce.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_vce(adev, false);
> - } else {
> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_vce(adev, false);
> + } else {
> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = vce_v3_0_hw_fini(adev);
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
2021-11-04 8:54 ` Lazar, Lijo
@ 2021-11-05 7:58 ` Quan, Evan
2021-11-08 10:26 ` Christian König
0 siblings, 1 reply; 7+ messages in thread
From: Quan, Evan @ 2021-11-05 7:58 UTC (permalink / raw)
To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander
[AMD Official Use Only]
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Thursday, November 4, 2021 4:55 PM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
> reboot
>
>
>
> On 11/4/2021 1:49 PM, Evan Quan wrote:
> > It's confirmed that on some APUs the interaction with SMU about DPM
> > disablement will power off the UVD completely. Thus the succeeding
> > interactions with UVD during the reboot will trigger hard hang. To
> > workaround this issue, we will skip the dpm disablement on APUs.
> >
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> > ---
> > drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> > drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
> +++++++++++++++++++--------
> > drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> > drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> > 4 files changed, 52 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > index c108b8381795..67ec13622e51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->uvd.idle_work);
>
> If the idle work handler had already started execution, it also goes through
> the same logic. Wouldn't that be a problem? The other case is if decode work
> is already completed before suspend is called, then also it disables DPM. Not
> sure how it works then, or is this caused by a second atempt to power off
> again after idle work is executed?
[Quan, Evan] Good point. Yes, maybe there should not be 2nd attempt when the target IP is already in the desired state.
Let me confirm with Boris.
BR
Evan
>
> Thanks,
> Lijo
>
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_uvd(adev, false);
> > - } else {
> > - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > - /* shutdown the UVD block */
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_CG_STATE_GATE);
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_uvd(adev, false);
> > + } else {
> > + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > + /* shutdown the UVD block */
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = uvd_v4_2_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > index 2d558c2f417d..60d05ec8c953 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_uvd(adev, false);
> > - } else {
> > - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > - /* shutdown the UVD block */
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_CG_STATE_GATE);
> > + /*
> > + * It's confirmed that on some APUs the interaction with SMU(about
> DPM disablement)
> > + * will power off the UVD. That will make the succeeding interactions
> with UVD on the
> > + * suspend path impossible. And the system will hang due to that. To
> workaround the
> > + * issue, we will skip the dpm disablement on APUs.
> > + *
> > + * TODO: a better solution is to reorg the action chains performed on
> suspend and make
> > + * the dpm disablement the last one. But that will involve a lot and
> needs MM team's
> > + * help.
> > + */
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_uvd(adev, false);
> > + } else {
> > + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > + /* shutdown the UVD block */
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = uvd_v6_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > index 67eb01fef789..8aa9d8c07053 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_vce(adev, false);
> > - } else {
> > - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_CG_STATE_GATE);
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_vce(adev, false);
> > + } else {
> > + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = vce_v2_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > index 142e291983b4..b177cd442838 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_vce(adev, false);
> > - } else {
> > - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_CG_STATE_GATE);
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_vce(adev, false);
> > + } else {
> > + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = vce_v3_0_hw_fini(adev);
> >
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
2021-11-05 7:58 ` Quan, Evan
@ 2021-11-08 10:26 ` Christian König
0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2021-11-08 10:26 UTC (permalink / raw)
To: Quan, Evan, Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander
Am 05.11.21 um 08:58 schrieb Quan, Evan:
> [AMD Official Use Only]
>
>> -----Original Message-----
>> From: Lazar, Lijo <Lijo.Lazar@amd.com>
>> Sent: Thursday, November 4, 2021 4:55 PM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
>> reboot
>>
>>
>>
>> On 11/4/2021 1:49 PM, Evan Quan wrote:
>>> It's confirmed that on some APUs the interaction with SMU about DPM
>>> disablement will power off the UVD completely. Thus the succeeding
>>> interactions with UVD during the reboot will trigger hard hang. To
>>> workaround this issue, we will skip the dpm disablement on APUs.
>>>
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
>>> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
>> +++++++++++++++++++--------
>>> drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
>>> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
>>> 4 files changed, 52 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> index c108b8381795..67ec13622e51 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
>>> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
>>> */
>>> cancel_delayed_work_sync(&adev->uvd.idle_work);
>> If the idle work handler had already started execution, it also goes through
>> the same logic. Wouldn't that be a problem? The other case is if decode work
>> is already completed before suspend is called, then also it disables DPM. Not
>> sure how it works then, or is this caused by a second atempt to power off
>> again after idle work is executed?
> [Quan, Evan] Good point. Yes, maybe there should not be 2nd attempt when the target IP is already in the desired state.
> Let me confirm with Boris.
It sounds like you not only need to prevent the clock gating, but also
enable the clocks during shutdown.
Regards,
Christian.
>
> BR
> Evan
>> Thanks,
>> Lijo
>>
>>> - if (adev->pm.dpm_enabled) {
>>> - amdgpu_dpm_enable_uvd(adev, false);
>>> - } else {
>>> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> - /* shutdown the UVD block */
>>> - amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> - AMD_PG_STATE_GATE);
>>> - amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> - AMD_CG_STATE_GATE);
>>> + if (!(adev->flags & AMD_IS_APU)) {
>>> + if (adev->pm.dpm_enabled) {
>>> + amdgpu_dpm_enable_uvd(adev, false);
>>> + } else {
>>> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> + /* shutdown the UVD block */
>>> + amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_PG_STATE_GATE);
>>> + amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_CG_STATE_GATE);
>>> + }
>>> }
>>>
>>> r = uvd_v4_2_hw_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> index 2d558c2f417d..60d05ec8c953 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
>>> */
>>> cancel_delayed_work_sync(&adev->uvd.idle_work);
>>>
>>> - if (adev->pm.dpm_enabled) {
>>> - amdgpu_dpm_enable_uvd(adev, false);
>>> - } else {
>>> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> - /* shutdown the UVD block */
>>> - amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> - AMD_PG_STATE_GATE);
>>> - amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> - AMD_CG_STATE_GATE);
>>> + /*
>>> + * It's confirmed that on some APUs the interaction with SMU(about
>> DPM disablement)
>>> + * will power off the UVD. That will make the succeeding interactions
>> with UVD on the
>>> + * suspend path impossible. And the system will hang due to that. To
>> workaround the
>>> + * issue, we will skip the dpm disablement on APUs.
>>> + *
>>> + * TODO: a better solution is to reorg the action chains performed on
>> suspend and make
>>> + * the dpm disablement the last one. But that will involve a lot and
>> needs MM team's
>>> + * help.
>>> + */
>>> + if (!(adev->flags & AMD_IS_APU)) {
>>> + if (adev->pm.dpm_enabled) {
>>> + amdgpu_dpm_enable_uvd(adev, false);
>>> + } else {
>>> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
>>> + /* shutdown the UVD block */
>>> + amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_PG_STATE_GATE);
>>> + amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_UVD,
>>> +
>> AMD_CG_STATE_GATE);
>>> + }
>>> }
>>>
>>> r = uvd_v6_0_hw_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> index 67eb01fef789..8aa9d8c07053 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
>>> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
>>> */
>>> cancel_delayed_work_sync(&adev->vce.idle_work);
>>>
>>> - if (adev->pm.dpm_enabled) {
>>> - amdgpu_dpm_enable_vce(adev, false);
>>> - } else {
>>> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> - amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> - AMD_PG_STATE_GATE);
>>> - amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> - AMD_CG_STATE_GATE);
>>> + if (!(adev->flags & AMD_IS_APU)) {
>>> + if (adev->pm.dpm_enabled) {
>>> + amdgpu_dpm_enable_vce(adev, false);
>>> + } else {
>>> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> + amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_PG_STATE_GATE);
>>> + amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_CG_STATE_GATE);
>>> + }
>>> }
>>>
>>> r = vce_v2_0_hw_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> index 142e291983b4..b177cd442838 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
>>> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
>>> */
>>> cancel_delayed_work_sync(&adev->vce.idle_work);
>>>
>>> - if (adev->pm.dpm_enabled) {
>>> - amdgpu_dpm_enable_vce(adev, false);
>>> - } else {
>>> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> - amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> - AMD_PG_STATE_GATE);
>>> - amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> - AMD_CG_STATE_GATE);
>>> + if (!(adev->flags & AMD_IS_APU)) {
>>> + if (adev->pm.dpm_enabled) {
>>> + amdgpu_dpm_enable_vce(adev, false);
>>> + } else {
>>> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
>>> + amdgpu_device_ip_set_powergating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_PG_STATE_GATE);
>>> + amdgpu_device_ip_set_clockgating_state(adev,
>> AMD_IP_BLOCK_TYPE_VCE,
>>> +
>> AMD_CG_STATE_GATE);
>>> + }
>>> }
>>>
>>> r = vce_v3_0_hw_fini(adev);
>>>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
2021-11-04 8:19 [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot Evan Quan
2021-11-04 8:24 ` Christian König
2021-11-04 8:54 ` Lazar, Lijo
@ 2021-11-05 2:03 ` James Zhu
2021-11-05 8:14 ` Quan, Evan
2 siblings, 1 reply; 7+ messages in thread
From: James Zhu @ 2021-11-05 2:03 UTC (permalink / raw)
To: amd-gfx
On 2021-11-04 4:19 a.m., Evan Quan wrote:
> It's confirmed that on some APUs the interaction with SMU about DPM disablement
> will power off the UVD completely. Thus the succeeding interactions with UVD
> during the reboot will trigger hard hang. To workaround this issue, we will skip
> the dpm disablement on APUs.
>
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> ---
> drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30 +++++++++++++++++++--------
> drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> 4 files changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index c108b8381795..67ec13622e51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->uvd.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_uvd(adev, false);
[JZ] Hi Evan, VCN code put amdgpu_dpm_enable_uvd(false) at the end of
stop. Can we do the same for uvd/vce?
Here, it is possible that some dec/enc jobs are still running when dpm
is called. I am not sure if this situation caused hard hang during reboot.
> - } else {
> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> - /* shutdown the UVD block */
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_uvd(adev, false);
> + } else {
> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> + /* shutdown the UVD block */
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = uvd_v4_2_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 2d558c2f417d..60d05ec8c953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->uvd.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_uvd(adev, false);
> - } else {
> - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> - /* shutdown the UVD block */
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> - AMD_CG_STATE_GATE);
> + /*
> + * It's confirmed that on some APUs the interaction with SMU(about DPM disablement)
> + * will power off the UVD. That will make the succeeding interactions with UVD on the
> + * suspend path impossible. And the system will hang due to that. To workaround the
> + * issue, we will skip the dpm disablement on APUs.
> + *
> + * TODO: a better solution is to reorg the action chains performed on suspend and make
> + * the dpm disablement the last one. But that will involve a lot and needs MM team's
> + * help.
> + */
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_uvd(adev, false);
> + } else {
> + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> + /* shutdown the UVD block */
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = uvd_v6_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index 67eb01fef789..8aa9d8c07053 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->vce.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_vce(adev, false);
> - } else {
> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_vce(adev, false);
> + } else {
> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = vce_v2_0_hw_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 142e291983b4..b177cd442838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> */
> cancel_delayed_work_sync(&adev->vce.idle_work);
>
> - if (adev->pm.dpm_enabled) {
> - amdgpu_dpm_enable_vce(adev, false);
> - } else {
> - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_PG_STATE_GATE);
> - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> - AMD_CG_STATE_GATE);
> + if (!(adev->flags & AMD_IS_APU)) {
> + if (adev->pm.dpm_enabled) {
> + amdgpu_dpm_enable_vce(adev, false);
> + } else {
> + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_PG_STATE_GATE);
> + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
> + AMD_CG_STATE_GATE);
> + }
> }
>
> r = vce_v3_0_hw_fini(adev);
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system reboot
2021-11-05 2:03 ` James Zhu
@ 2021-11-05 8:14 ` Quan, Evan
0 siblings, 0 replies; 7+ messages in thread
From: Quan, Evan @ 2021-11-05 8:14 UTC (permalink / raw)
To: Zhu, James, amd-gfx
[AMD Official Use Only]
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> James Zhu
> Sent: Friday, November 5, 2021 10:03 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: fix the Carrizo UVD hang on system
> reboot
>
>
> On 2021-11-04 4:19 a.m., Evan Quan wrote:
> > It's confirmed that on some APUs the interaction with SMU about DPM
> > disablement will power off the UVD completely. Thus the succeeding
> > interactions with UVD during the reboot will trigger hard hang. To
> > workaround this issue, we will skip the dpm disablement on APUs.
> >
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I4340cc2fb0fd94f439cbac5d4963fe920866bc13
> > ---
> > drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 20 ++++++++++--------
> > drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 30
> +++++++++++++++++++--------
> > drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 18 +++++++++-------
> > drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 18 +++++++++-------
> > 4 files changed, 52 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > index c108b8381795..67ec13622e51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> > @@ -238,15 +238,17 @@ static int uvd_v4_2_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_uvd(adev, false);
>
> [JZ] Hi Evan, VCN code put amdgpu_dpm_enable_uvd(false) at the end of
> stop. Can we do the same for uvd/vce?
[Quan, Evan] Sounds reasonable to me. Actually Lijo provided some insights which enlightened me.
I will drop this patch and provide a new one.
BR
Evan
>
> Here, it is possible that some dec/enc jobs are still running when dpm is
> called. I am not sure if this situation caused hard hang during reboot.
>
> > - } else {
> > - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > - /* shutdown the UVD block */
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_CG_STATE_GATE);
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_uvd(adev, false);
> > + } else {
> > + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > + /* shutdown the UVD block */
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = uvd_v4_2_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > index 2d558c2f417d..60d05ec8c953 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> > @@ -560,15 +560,27 @@ static int uvd_v6_0_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->uvd.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_uvd(adev, false);
> > - } else {
> > - amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > - /* shutdown the UVD block */
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > - AMD_CG_STATE_GATE);
> > + /*
> > + * It's confirmed that on some APUs the interaction with SMU(about
> DPM disablement)
> > + * will power off the UVD. That will make the succeeding interactions
> with UVD on the
> > + * suspend path impossible. And the system will hang due to that. To
> workaround the
> > + * issue, we will skip the dpm disablement on APUs.
> > + *
> > + * TODO: a better solution is to reorg the action chains performed on
> suspend and make
> > + * the dpm disablement the last one. But that will involve a lot and
> needs MM team's
> > + * help.
> > + */
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_uvd(adev, false);
> > + } else {
> > + amdgpu_asic_set_uvd_clocks(adev, 0, 0);
> > + /* shutdown the UVD block */
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_UVD,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = uvd_v6_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > index 67eb01fef789..8aa9d8c07053 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> > @@ -505,14 +505,16 @@ static int vce_v2_0_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_vce(adev, false);
> > - } else {
> > - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_CG_STATE_GATE);
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_vce(adev, false);
> > + } else {
> > + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = vce_v2_0_hw_fini(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > index 142e291983b4..b177cd442838 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> > @@ -520,14 +520,16 @@ static int vce_v3_0_suspend(void *handle)
> > */
> > cancel_delayed_work_sync(&adev->vce.idle_work);
> >
> > - if (adev->pm.dpm_enabled) {
> > - amdgpu_dpm_enable_vce(adev, false);
> > - } else {
> > - amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > - amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_PG_STATE_GATE);
> > - amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > - AMD_CG_STATE_GATE);
> > + if (!(adev->flags & AMD_IS_APU)) {
> > + if (adev->pm.dpm_enabled) {
> > + amdgpu_dpm_enable_vce(adev, false);
> > + } else {
> > + amdgpu_asic_set_vce_clocks(adev, 0, 0);
> > + amdgpu_device_ip_set_powergating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_PG_STATE_GATE);
> > + amdgpu_device_ip_set_clockgating_state(adev,
> AMD_IP_BLOCK_TYPE_VCE,
> > +
> AMD_CG_STATE_GATE);
> > + }
> > }
> >
> > r = vce_v3_0_hw_fini(adev);
^ permalink raw reply [flat|nested] 7+ messages in thread