* [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI @ 2022-04-11 8:54 Evan Quan 2022-04-11 10:06 ` Lazar, Lijo 2022-04-21 8:29 ` Thorsten Leemhuis 0 siblings, 2 replies; 9+ messages in thread From: Evan Quan @ 2022-04-11 8:54 UTC (permalink / raw) To: amd-gfx; +Cc: Alexander.Deucher, pmenzel, Evan Quan, arthur.marsh The adev->pm.mutx is already held at the beginning of amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_enable_vce. But on their calling path, amdgpu_display_bandwidth_update will be called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They will then try to acquire the same adev->pm.mutex and deadlock will occur. By placing amdgpu_display_bandwidth_update outside of adev->pm.mutex protection(considering logically they do not need such protection) and restructuring the call flow accordingly, we can eliminate the deadlock issue. This comes with no real logics change. Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c") Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> Link: https: //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de/ BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 Signed-off-by: Evan Quan <evan.quan@amd.com> -- v1->v2: - rich the commit messages(Paul) --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 +++++++++++++++++++ .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- 4 files changed, 39 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 5504d81c77b7..72e7b5d40af6 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int i; if (!adev->pm.dpm_enabled) return; @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) if (!pp_funcs->pm_compute_clocks) return; + if (adev->mode_info.num_crtc) + amdgpu_display_bandwidth_update(adev); + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->sched.ready) + amdgpu_fence_wait_empty(ring); + } + mutex_lock(&adev->pm.mutex); pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); mutex_unlock(&adev->pm.mutex); @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; + } else { + adev->pm.dpm.uvd_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_dpm_compute_clocks(adev); + return; + } + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); if (ret) DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; + } else { + adev->pm.dpm.vce_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_dpm_compute_clocks(adev); + return; + } + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); if (ret) DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index 9613c6181c17..d3fe149d8476 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_legacy_dpm_compute_clocks(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i = 0; - - if (adev->mode_info.num_crtc) - amdgpu_display_bandwidth_update(adev); - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->sched.ready) - amdgpu_fence_wait_empty(ring); - } amdgpu_dpm_get_active_displays(adev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index caae54487f9c..633dab14f51c 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev) } #endif -static int si_set_powergating_by_smu(void *handle, - uint32_t block_type, - bool gate) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - switch (block_type) { - case AMD_IP_BLOCK_TYPE_UVD: - if (!gate) { - adev->pm.dpm.uvd_active = true; - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; - } else { - adev->pm.dpm.uvd_active = false; - } - - amdgpu_legacy_dpm_compute_clocks(handle); - break; - case AMD_IP_BLOCK_TYPE_VCE: - if (!gate) { - adev->pm.dpm.vce_active = true; - /* XXX select vce level based on ring/task */ - adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; - } else { - adev->pm.dpm.vce_active = false; - } - - amdgpu_legacy_dpm_compute_clocks(handle); - break; - default: - break; - } - return 0; -} - static int si_set_sw_state(struct amdgpu_device *adev) { return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = { .print_power_state = &si_dpm_print_power_state, .debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level, .force_performance_level = &si_dpm_force_performance_level, - .set_powergating_by_smu = &si_set_powergating_by_smu, .vblank_too_short = &si_dpm_vblank_too_short, .set_fan_control_mode = &si_dpm_set_fan_control_mode, .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index dbed72c1e0c6..1eb4e613b27a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle) { struct pp_hwmgr *hwmgr = handle; struct amdgpu_device *adev = hwmgr->adev; - int i = 0; - - if (adev->mode_info.num_crtc) - amdgpu_display_bandwidth_update(adev); - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->sched.ready) - amdgpu_fence_wait_empty(ring); - } if (!amdgpu_device_has_dc_support(adev)) { amdgpu_dpm_get_active_displays(adev); -- 2.29.0 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-11 8:54 [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI Evan Quan @ 2022-04-11 10:06 ` Lazar, Lijo 2022-04-22 8:27 ` Quan, Evan 2022-04-21 8:29 ` Thorsten Leemhuis 1 sibling, 1 reply; 9+ messages in thread From: Lazar, Lijo @ 2022-04-11 10:06 UTC (permalink / raw) To: Evan Quan, amd-gfx; +Cc: Alexander.Deucher, pmenzel, arthur.marsh On 4/11/2022 2:24 PM, Evan Quan wrote: > The adev->pm.mutx is already held at the beginning of > amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_enable_vce. > But on their calling path, amdgpu_display_bandwidth_update will be > called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They > will then try to acquire the same adev->pm.mutex and deadlock will > occur. > What about using locked versions of get_sclk/mclk and leave the rest as they are? Thanks, Lijo > By placing amdgpu_display_bandwidth_update outside of adev->pm.mutex > protection(considering logically they do not need such protection) and > restructuring the call flow accordingly, we can eliminate the deadlock > issue. This comes with no real logics change. > > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c") > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> > Link: https: //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de/ > BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 > Signed-off-by: Evan Quan <evan.quan@amd.com> > -- > v1->v2: > - rich the commit messages(Paul) > --- > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 +++++++++++++++++++ > .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- > drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- > .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- > 4 files changed, 39 insertions(+), 55 deletions(-) > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > index 5504d81c77b7..72e7b5d40af6 100644 > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso > void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > { > const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > + int i; > > if (!adev->pm.dpm_enabled) > return; > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > if (!pp_funcs->pm_compute_clocks) > return; > > + if (adev->mode_info.num_crtc) > + amdgpu_display_bandwidth_update(adev); > + > + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > + struct amdgpu_ring *ring = adev->rings[i]; > + if (ring && ring->sched.ready) > + amdgpu_fence_wait_empty(ring); > + } > + > mutex_lock(&adev->pm.mutex); > pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); > mutex_unlock(&adev->pm.mutex); > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) > { > int ret = 0; > > + if (adev->family == AMDGPU_FAMILY_SI) { > + mutex_lock(&adev->pm.mutex); > + if (enable) { > + adev->pm.dpm.uvd_active = true; > + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; > + } else { > + adev->pm.dpm.uvd_active = false; > + } > + mutex_unlock(&adev->pm.mutex); > + > + amdgpu_dpm_compute_clocks(adev); > + return; > + } > + > ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); > if (ret) > DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", > @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) > { > int ret = 0; > > + if (adev->family == AMDGPU_FAMILY_SI) { > + mutex_lock(&adev->pm.mutex); > + if (enable) { > + adev->pm.dpm.vce_active = true; > + /* XXX select vce level based on ring/task */ > + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; > + } else { > + adev->pm.dpm.vce_active = false; > + } > + mutex_unlock(&adev->pm.mutex); > + > + amdgpu_dpm_compute_clocks(adev); > + return; > + } > + > ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); > if (ret) > DRM_ERROR("Dpm %s vce failed, ret = %d. \n", > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > index 9613c6181c17..d3fe149d8476 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) > void amdgpu_legacy_dpm_compute_clocks(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > - int i = 0; > - > - if (adev->mode_info.num_crtc) > - amdgpu_display_bandwidth_update(adev); > - > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > - struct amdgpu_ring *ring = adev->rings[i]; > - if (ring && ring->sched.ready) > - amdgpu_fence_wait_empty(ring); > - } > > amdgpu_dpm_get_active_displays(adev); > > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > index caae54487f9c..633dab14f51c 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev) > } > #endif > > -static int si_set_powergating_by_smu(void *handle, > - uint32_t block_type, > - bool gate) > -{ > - struct amdgpu_device *adev = (struct amdgpu_device *)handle; > - > - switch (block_type) { > - case AMD_IP_BLOCK_TYPE_UVD: > - if (!gate) { > - adev->pm.dpm.uvd_active = true; > - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; > - } else { > - adev->pm.dpm.uvd_active = false; > - } > - > - amdgpu_legacy_dpm_compute_clocks(handle); > - break; > - case AMD_IP_BLOCK_TYPE_VCE: > - if (!gate) { > - adev->pm.dpm.vce_active = true; > - /* XXX select vce level based on ring/task */ > - adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; > - } else { > - adev->pm.dpm.vce_active = false; > - } > - > - amdgpu_legacy_dpm_compute_clocks(handle); > - break; > - default: > - break; > - } > - return 0; > -} > - > static int si_set_sw_state(struct amdgpu_device *adev) > { > return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = { > .print_power_state = &si_dpm_print_power_state, > .debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level, > .force_performance_level = &si_dpm_force_performance_level, > - .set_powergating_by_smu = &si_set_powergating_by_smu, > .vblank_too_short = &si_dpm_vblank_too_short, > .set_fan_control_mode = &si_dpm_set_fan_control_mode, > .get_fan_control_mode = &si_dpm_get_fan_control_mode, > diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > index dbed72c1e0c6..1eb4e613b27a 100644 > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle) > { > struct pp_hwmgr *hwmgr = handle; > struct amdgpu_device *adev = hwmgr->adev; > - int i = 0; > - > - if (adev->mode_info.num_crtc) > - amdgpu_display_bandwidth_update(adev); > - > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > - struct amdgpu_ring *ring = adev->rings[i]; > - if (ring && ring->sched.ready) > - amdgpu_fence_wait_empty(ring); > - } > > if (!amdgpu_device_has_dc_support(adev)) { > amdgpu_dpm_get_active_displays(adev); > ^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-11 10:06 ` Lazar, Lijo @ 2022-04-22 8:27 ` Quan, Evan 2022-04-22 8:36 ` Lazar, Lijo 0 siblings, 1 reply; 9+ messages in thread From: Quan, Evan @ 2022-04-22 8:27 UTC (permalink / raw) To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander, pmenzel, arthur.marsh [AMD Official Use Only] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@amd.com> > Sent: Monday, April 11, 2022 6:06 PM > To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI > > > > On 4/11/2022 2:24 PM, Evan Quan wrote: > > The adev->pm.mutx is already held at the beginning of > > > amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_ > enable_vce. > > But on their calling path, amdgpu_display_bandwidth_update will be > > called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They will > > then try to acquire the same adev->pm.mutex and deadlock will occur. > > > > What about using locked versions of get_sclk/mclk and leave the rest as they > are? [Quan, Evan] Yeah, that(adding two new APIs for locked versions of get_sclk/mclk) should also work. But considering there cannot be other ASICs who need them, they are kind of redundant. Meanwhile my version is just to get everything reverted back to its original. So, it should be quite safe. How do you think? BR Evan > > Thanks, > Lijo > > > By placing amdgpu_display_bandwidth_update outside of adev- > >pm.mutex > > protection(considering logically they do not need such protection) and > > restructuring the call flow accordingly, we can eliminate the deadlock > > issue. This comes with no real logics change. > > > > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in > > amdgpu_dpm.c") > > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > > Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> > > Link: https: > > //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg. > > de/ > > BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 > > Signed-off-by: Evan Quan <evan.quan@amd.com> > > -- > > v1->v2: > > - rich the commit messages(Paul) > > --- > > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 > +++++++++++++++++++ > > .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- > > drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- > > .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- > > 4 files changed, 39 insertions(+), 55 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > index 5504d81c77b7..72e7b5d40af6 100644 > > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct > amdgpu_device *adev, enum amd_pp_sensors senso > > void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > > { > > const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > > + int i; > > > > if (!adev->pm.dpm_enabled) > > return; > > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct > amdgpu_device *adev) > > if (!pp_funcs->pm_compute_clocks) > > return; > > > > + if (adev->mode_info.num_crtc) > > + amdgpu_display_bandwidth_update(adev); > > + > > + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > > + struct amdgpu_ring *ring = adev->rings[i]; > > + if (ring && ring->sched.ready) > > + amdgpu_fence_wait_empty(ring); > > + } > > + > > mutex_lock(&adev->pm.mutex); > > pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); > > mutex_unlock(&adev->pm.mutex); > > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct > amdgpu_device *adev, bool enable) > > { > > int ret = 0; > > > > + if (adev->family == AMDGPU_FAMILY_SI) { > > + mutex_lock(&adev->pm.mutex); > > + if (enable) { > > + adev->pm.dpm.uvd_active = true; > > + adev->pm.dpm.state = > POWER_STATE_TYPE_INTERNAL_UVD; > > + } else { > > + adev->pm.dpm.uvd_active = false; > > + } > > + mutex_unlock(&adev->pm.mutex); > > + > > + amdgpu_dpm_compute_clocks(adev); > > + return; > > + } > > + > > ret = amdgpu_dpm_set_powergating_by_smu(adev, > AMD_IP_BLOCK_TYPE_UVD, !enable); > > if (ret) > > DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 > +477,21 @@ > > void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) > > { > > int ret = 0; > > > > + if (adev->family == AMDGPU_FAMILY_SI) { > > + mutex_lock(&adev->pm.mutex); > > + if (enable) { > > + adev->pm.dpm.vce_active = true; > > + /* XXX select vce level based on ring/task */ > > + adev->pm.dpm.vce_level = > AMD_VCE_LEVEL_AC_ALL; > > + } else { > > + adev->pm.dpm.vce_active = false; > > + } > > + mutex_unlock(&adev->pm.mutex); > > + > > + amdgpu_dpm_compute_clocks(adev); > > + return; > > + } > > + > > ret = amdgpu_dpm_set_powergating_by_smu(adev, > AMD_IP_BLOCK_TYPE_VCE, !enable); > > if (ret) > > DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git > > a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > index 9613c6181c17..d3fe149d8476 100644 > > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > @@ -1028,16 +1028,6 @@ static int > amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) > > void amdgpu_legacy_dpm_compute_clocks(void *handle) > > { > > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - int i = 0; > > - > > - if (adev->mode_info.num_crtc) > > - amdgpu_display_bandwidth_update(adev); > > - > > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > > - struct amdgpu_ring *ring = adev->rings[i]; > > - if (ring && ring->sched.ready) > > - amdgpu_fence_wait_empty(ring); > > - } > > > > amdgpu_dpm_get_active_displays(adev); > > > > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > index caae54487f9c..633dab14f51c 100644 > > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct > amdgpu_device *adev) > > } > > #endif > > > > -static int si_set_powergating_by_smu(void *handle, > > - uint32_t block_type, > > - bool gate) > > -{ > > - struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - > > - switch (block_type) { > > - case AMD_IP_BLOCK_TYPE_UVD: > > - if (!gate) { > > - adev->pm.dpm.uvd_active = true; > > - adev->pm.dpm.state = > POWER_STATE_TYPE_INTERNAL_UVD; > > - } else { > > - adev->pm.dpm.uvd_active = false; > > - } > > - > > - amdgpu_legacy_dpm_compute_clocks(handle); > > - break; > > - case AMD_IP_BLOCK_TYPE_VCE: > > - if (!gate) { > > - adev->pm.dpm.vce_active = true; > > - /* XXX select vce level based on ring/task */ > > - adev->pm.dpm.vce_level = > AMD_VCE_LEVEL_AC_ALL; > > - } else { > > - adev->pm.dpm.vce_active = false; > > - } > > - > > - amdgpu_legacy_dpm_compute_clocks(handle); > > - break; > > - default: > > - break; > > - } > > - return 0; > > -} > > - > > static int si_set_sw_state(struct amdgpu_device *adev) > > { > > return (amdgpu_si_send_msg_to_smc(adev, > PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? > > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs > = { > > .print_power_state = &si_dpm_print_power_state, > > .debugfs_print_current_performance_level = > &si_dpm_debugfs_print_current_performance_level, > > .force_performance_level = &si_dpm_force_performance_level, > > - .set_powergating_by_smu = &si_set_powergating_by_smu, > > .vblank_too_short = &si_dpm_vblank_too_short, > > .set_fan_control_mode = &si_dpm_set_fan_control_mode, > > .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff -- > git > > a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > index dbed72c1e0c6..1eb4e613b27a 100644 > > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void > *handle) > > { > > struct pp_hwmgr *hwmgr = handle; > > struct amdgpu_device *adev = hwmgr->adev; > > - int i = 0; > > - > > - if (adev->mode_info.num_crtc) > > - amdgpu_display_bandwidth_update(adev); > > - > > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > > - struct amdgpu_ring *ring = adev->rings[i]; > > - if (ring && ring->sched.ready) > > - amdgpu_fence_wait_empty(ring); > > - } > > > > if (!amdgpu_device_has_dc_support(adev)) { > > amdgpu_dpm_get_active_displays(adev); > > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-22 8:27 ` Quan, Evan @ 2022-04-22 8:36 ` Lazar, Lijo 2022-04-22 8:53 ` Quan, Evan 0 siblings, 1 reply; 9+ messages in thread From: Lazar, Lijo @ 2022-04-22 8:36 UTC (permalink / raw) To: Quan, Evan, amd-gfx; +Cc: Deucher, Alexander, pmenzel, arthur.marsh On 4/22/2022 1:57 PM, Quan, Evan wrote: > [AMD Official Use Only] > > > >> -----Original Message----- >> From: Lazar, Lijo <Lijo.Lazar@amd.com> >> Sent: Monday, April 11, 2022 6:06 PM >> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org >> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; >> pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net >> Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI >> >> >> >> On 4/11/2022 2:24 PM, Evan Quan wrote: >>> The adev->pm.mutx is already held at the beginning of >>> >> amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_ >> enable_vce. >>> But on their calling path, amdgpu_display_bandwidth_update will be >>> called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They will >>> then try to acquire the same adev->pm.mutex and deadlock will occur. >>> >> >> What about using locked versions of get_sclk/mclk and leave the rest as they >> are? > [Quan, Evan] Yeah, that(adding two new APIs for locked versions of get_sclk/mclk) should also work. But considering there cannot be other ASICs who need them, they are kind of redundant. > Meanwhile my version is just to get everything reverted back to its original. So, it should be quite safe. How do you think? > As far as I see - 1) It moves display watermark updates outside of locking 2) It adds bandwidth update under common compute_clocks paths. I guess it is not the same as pre-mutex change. Thanks, Lijo > BR > Evan >> >> Thanks, >> Lijo >> >>> By placing amdgpu_display_bandwidth_update outside of adev- >>> pm.mutex >>> protection(considering logically they do not need such protection) and >>> restructuring the call flow accordingly, we can eliminate the deadlock >>> issue. This comes with no real logics change. >>> >>> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in >>> amdgpu_dpm.c") >>> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> >>> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> >>> Link: https: >>> //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg. >>> de/ >>> BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 >>> Signed-off-by: Evan Quan <evan.quan@amd.com> >>> -- >>> v1->v2: >>> - rich the commit messages(Paul) >>> --- >>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 >> +++++++++++++++++++ >>> .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- >>> drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- >>> .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- >>> 4 files changed, 39 insertions(+), 55 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>> index 5504d81c77b7..72e7b5d40af6 100644 >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct >> amdgpu_device *adev, enum amd_pp_sensors senso >>> void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) >>> { >>> const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; >>> + int i; >>> >>> if (!adev->pm.dpm_enabled) >>> return; >>> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct >> amdgpu_device *adev) >>> if (!pp_funcs->pm_compute_clocks) >>> return; >>> >>> + if (adev->mode_info.num_crtc) >>> + amdgpu_display_bandwidth_update(adev); >>> + >>> + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { >>> + struct amdgpu_ring *ring = adev->rings[i]; >>> + if (ring && ring->sched.ready) >>> + amdgpu_fence_wait_empty(ring); >>> + } >>> + >>> mutex_lock(&adev->pm.mutex); >>> pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); >>> mutex_unlock(&adev->pm.mutex); >>> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct >> amdgpu_device *adev, bool enable) >>> { >>> int ret = 0; >>> >>> + if (adev->family == AMDGPU_FAMILY_SI) { >>> + mutex_lock(&adev->pm.mutex); >>> + if (enable) { >>> + adev->pm.dpm.uvd_active = true; >>> + adev->pm.dpm.state = >> POWER_STATE_TYPE_INTERNAL_UVD; >>> + } else { >>> + adev->pm.dpm.uvd_active = false; >>> + } >>> + mutex_unlock(&adev->pm.mutex); >>> + >>> + amdgpu_dpm_compute_clocks(adev); >>> + return; >>> + } >>> + >>> ret = amdgpu_dpm_set_powergating_by_smu(adev, >> AMD_IP_BLOCK_TYPE_UVD, !enable); >>> if (ret) >>> DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 >> +477,21 @@ >>> void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) >>> { >>> int ret = 0; >>> >>> + if (adev->family == AMDGPU_FAMILY_SI) { >>> + mutex_lock(&adev->pm.mutex); >>> + if (enable) { >>> + adev->pm.dpm.vce_active = true; >>> + /* XXX select vce level based on ring/task */ >>> + adev->pm.dpm.vce_level = >> AMD_VCE_LEVEL_AC_ALL; >>> + } else { >>> + adev->pm.dpm.vce_active = false; >>> + } >>> + mutex_unlock(&adev->pm.mutex); >>> + >>> + amdgpu_dpm_compute_clocks(adev); >>> + return; >>> + } >>> + >>> ret = amdgpu_dpm_set_powergating_by_smu(adev, >> AMD_IP_BLOCK_TYPE_VCE, !enable); >>> if (ret) >>> DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git >>> a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>> b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>> index 9613c6181c17..d3fe149d8476 100644 >>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>> @@ -1028,16 +1028,6 @@ static int >> amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) >>> void amdgpu_legacy_dpm_compute_clocks(void *handle) >>> { >>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>> - int i = 0; >>> - >>> - if (adev->mode_info.num_crtc) >>> - amdgpu_display_bandwidth_update(adev); >>> - >>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { >>> - struct amdgpu_ring *ring = adev->rings[i]; >>> - if (ring && ring->sched.ready) >>> - amdgpu_fence_wait_empty(ring); >>> - } >>> >>> amdgpu_dpm_get_active_displays(adev); >>> >>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>> b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>> index caae54487f9c..633dab14f51c 100644 >>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct >> amdgpu_device *adev) >>> } >>> #endif >>> >>> -static int si_set_powergating_by_smu(void *handle, >>> - uint32_t block_type, >>> - bool gate) >>> -{ >>> - struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>> - >>> - switch (block_type) { >>> - case AMD_IP_BLOCK_TYPE_UVD: >>> - if (!gate) { >>> - adev->pm.dpm.uvd_active = true; >>> - adev->pm.dpm.state = >> POWER_STATE_TYPE_INTERNAL_UVD; >>> - } else { >>> - adev->pm.dpm.uvd_active = false; >>> - } >>> - >>> - amdgpu_legacy_dpm_compute_clocks(handle); >>> - break; >>> - case AMD_IP_BLOCK_TYPE_VCE: >>> - if (!gate) { >>> - adev->pm.dpm.vce_active = true; >>> - /* XXX select vce level based on ring/task */ >>> - adev->pm.dpm.vce_level = >> AMD_VCE_LEVEL_AC_ALL; >>> - } else { >>> - adev->pm.dpm.vce_active = false; >>> - } >>> - >>> - amdgpu_legacy_dpm_compute_clocks(handle); >>> - break; >>> - default: >>> - break; >>> - } >>> - return 0; >>> -} >>> - >>> static int si_set_sw_state(struct amdgpu_device *adev) >>> { >>> return (amdgpu_si_send_msg_to_smc(adev, >> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? >>> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs >> = { >>> .print_power_state = &si_dpm_print_power_state, >>> .debugfs_print_current_performance_level = >> &si_dpm_debugfs_print_current_performance_level, >>> .force_performance_level = &si_dpm_force_performance_level, >>> - .set_powergating_by_smu = &si_set_powergating_by_smu, >>> .vblank_too_short = &si_dpm_vblank_too_short, >>> .set_fan_control_mode = &si_dpm_set_fan_control_mode, >>> .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff -- >> git >>> a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>> b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>> index dbed72c1e0c6..1eb4e613b27a 100644 >>> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void >> *handle) >>> { >>> struct pp_hwmgr *hwmgr = handle; >>> struct amdgpu_device *adev = hwmgr->adev; >>> - int i = 0; >>> - >>> - if (adev->mode_info.num_crtc) >>> - amdgpu_display_bandwidth_update(adev); >>> - >>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { >>> - struct amdgpu_ring *ring = adev->rings[i]; >>> - if (ring && ring->sched.ready) >>> - amdgpu_fence_wait_empty(ring); >>> - } >>> >>> if (!amdgpu_device_has_dc_support(adev)) { >>> amdgpu_dpm_get_active_displays(adev); >>> ^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-22 8:36 ` Lazar, Lijo @ 2022-04-22 8:53 ` Quan, Evan 2022-04-22 9:25 ` Lazar, Lijo 0 siblings, 1 reply; 9+ messages in thread From: Quan, Evan @ 2022-04-22 8:53 UTC (permalink / raw) To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander, pmenzel, arthur.marsh [AMD Official Use Only] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@amd.com> > Sent: Friday, April 22, 2022 4:36 PM > To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI > > > > On 4/22/2022 1:57 PM, Quan, Evan wrote: > > [AMD Official Use Only] > > > > > > > >> -----Original Message----- > >> From: Lazar, Lijo <Lijo.Lazar@amd.com> > >> Sent: Monday, April 11, 2022 6:06 PM > >> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org > >> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > >> pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > >> Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed > >> on SI > >> > >> > >> > >> On 4/11/2022 2:24 PM, Evan Quan wrote: > >>> The adev->pm.mutx is already held at the beginning of > >>> > >> > amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_ > >> enable_vce. > >>> But on their calling path, amdgpu_display_bandwidth_update will be > >>> called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They > >>> will then try to acquire the same adev->pm.mutex and deadlock will > occur. > >>> > >> > >> What about using locked versions of get_sclk/mclk and leave the rest > >> as they are? > > [Quan, Evan] Yeah, that(adding two new APIs for locked versions of > get_sclk/mclk) should also work. But considering there cannot be other ASICs > who need them, they are kind of redundant. > > Meanwhile my version is just to get everything reverted back to its original. > So, it should be quite safe. How do you think? > > > > As far as I see - > > 1) It moves display watermark updates outside of locking > 2) It adds bandwidth update under common compute_clocks paths. [Quan, Evan] I suppose what you mentioned is about "amdgpu_display_bandwidth_update". No, I just double confirmed(the old code in our amd-staging-dkms-5.9/5.10 branch) and it did come with no lock protection. For those legacy ASICs, the watermark setting was performed by programming registers directly(handled at display side). That might explain why it did not have lock protection as recent ASICs. BR Evan > > I guess it is not the same as pre-mutex change. > > Thanks, > Lijo > > > BR > > Evan > >> > >> Thanks, > >> Lijo > >> > >>> By placing amdgpu_display_bandwidth_update outside of adev- > pm.mutex > >>> protection(considering logically they do not need such protection) > >>> and restructuring the call flow accordingly, we can eliminate the > >>> deadlock issue. This comes with no real logics change. > >>> > >>> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in > >>> amdgpu_dpm.c") > >>> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > >>> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> > >>> Link: https: > >>> //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee- > 32c4cf7d8f9c@molgen.mpg. > >>> de/ > >>> BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 > >>> Signed-off-by: Evan Quan <evan.quan@amd.com> > >>> -- > >>> v1->v2: > >>> - rich the commit messages(Paul) > >>> --- > >>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 > >> +++++++++++++++++++ > >>> .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- > >>> drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- > >>> .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- > >>> 4 files changed, 39 insertions(+), 55 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>> index 5504d81c77b7..72e7b5d40af6 100644 > >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct > >> amdgpu_device *adev, enum amd_pp_sensors senso > >>> void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > >>> { > >>> const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > >>> + int i; > >>> > >>> if (!adev->pm.dpm_enabled) > >>> return; > >>> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct > >> amdgpu_device *adev) > >>> if (!pp_funcs->pm_compute_clocks) > >>> return; > >>> > >>> + if (adev->mode_info.num_crtc) > >>> + amdgpu_display_bandwidth_update(adev); > >>> + > >>> + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > >>> + struct amdgpu_ring *ring = adev->rings[i]; > >>> + if (ring && ring->sched.ready) > >>> + amdgpu_fence_wait_empty(ring); > >>> + } > >>> + > >>> mutex_lock(&adev->pm.mutex); > >>> pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); > >>> mutex_unlock(&adev->pm.mutex); > >>> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct > >> amdgpu_device *adev, bool enable) > >>> { > >>> int ret = 0; > >>> > >>> + if (adev->family == AMDGPU_FAMILY_SI) { > >>> + mutex_lock(&adev->pm.mutex); > >>> + if (enable) { > >>> + adev->pm.dpm.uvd_active = true; > >>> + adev->pm.dpm.state = > >> POWER_STATE_TYPE_INTERNAL_UVD; > >>> + } else { > >>> + adev->pm.dpm.uvd_active = false; > >>> + } > >>> + mutex_unlock(&adev->pm.mutex); > >>> + > >>> + amdgpu_dpm_compute_clocks(adev); > >>> + return; > >>> + } > >>> + > >>> ret = amdgpu_dpm_set_powergating_by_smu(adev, > >> AMD_IP_BLOCK_TYPE_UVD, !enable); > >>> if (ret) > >>> DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 > >> +477,21 @@ > >>> void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool > enable) > >>> { > >>> int ret = 0; > >>> > >>> + if (adev->family == AMDGPU_FAMILY_SI) { > >>> + mutex_lock(&adev->pm.mutex); > >>> + if (enable) { > >>> + adev->pm.dpm.vce_active = true; > >>> + /* XXX select vce level based on ring/task */ > >>> + adev->pm.dpm.vce_level = > >> AMD_VCE_LEVEL_AC_ALL; > >>> + } else { > >>> + adev->pm.dpm.vce_active = false; > >>> + } > >>> + mutex_unlock(&adev->pm.mutex); > >>> + > >>> + amdgpu_dpm_compute_clocks(adev); > >>> + return; > >>> + } > >>> + > >>> ret = amdgpu_dpm_set_powergating_by_smu(adev, > >> AMD_IP_BLOCK_TYPE_VCE, !enable); > >>> if (ret) > >>> DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git > >>> a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>> b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>> index 9613c6181c17..d3fe149d8476 100644 > >>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>> @@ -1028,16 +1028,6 @@ static int > >> amdgpu_dpm_change_power_state_locked(struct amdgpu_device > *adev) > >>> void amdgpu_legacy_dpm_compute_clocks(void *handle) > >>> { > >>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >>> - int i = 0; > >>> - > >>> - if (adev->mode_info.num_crtc) > >>> - amdgpu_display_bandwidth_update(adev); > >>> - > >>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > >>> - struct amdgpu_ring *ring = adev->rings[i]; > >>> - if (ring && ring->sched.ready) > >>> - amdgpu_fence_wait_empty(ring); > >>> - } > >>> > >>> amdgpu_dpm_get_active_displays(adev); > >>> > >>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>> b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>> index caae54487f9c..633dab14f51c 100644 > >>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct > >> amdgpu_device *adev) > >>> } > >>> #endif > >>> > >>> -static int si_set_powergating_by_smu(void *handle, > >>> - uint32_t block_type, > >>> - bool gate) > >>> -{ > >>> - struct amdgpu_device *adev = (struct amdgpu_device *)handle; > >>> - > >>> - switch (block_type) { > >>> - case AMD_IP_BLOCK_TYPE_UVD: > >>> - if (!gate) { > >>> - adev->pm.dpm.uvd_active = true; > >>> - adev->pm.dpm.state = > >> POWER_STATE_TYPE_INTERNAL_UVD; > >>> - } else { > >>> - adev->pm.dpm.uvd_active = false; > >>> - } > >>> - > >>> - amdgpu_legacy_dpm_compute_clocks(handle); > >>> - break; > >>> - case AMD_IP_BLOCK_TYPE_VCE: > >>> - if (!gate) { > >>> - adev->pm.dpm.vce_active = true; > >>> - /* XXX select vce level based on ring/task */ > >>> - adev->pm.dpm.vce_level = > >> AMD_VCE_LEVEL_AC_ALL; > >>> - } else { > >>> - adev->pm.dpm.vce_active = false; > >>> - } > >>> - > >>> - amdgpu_legacy_dpm_compute_clocks(handle); > >>> - break; > >>> - default: > >>> - break; > >>> - } > >>> - return 0; > >>> -} > >>> - > >>> static int si_set_sw_state(struct amdgpu_device *adev) > >>> { > >>> return (amdgpu_si_send_msg_to_smc(adev, > >> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? > >>> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs > si_dpm_funcs > >> = { > >>> .print_power_state = &si_dpm_print_power_state, > >>> .debugfs_print_current_performance_level = > >> &si_dpm_debugfs_print_current_performance_level, > >>> .force_performance_level = &si_dpm_force_performance_level, > >>> - .set_powergating_by_smu = &si_set_powergating_by_smu, > >>> .vblank_too_short = &si_dpm_vblank_too_short, > >>> .set_fan_control_mode = &si_dpm_set_fan_control_mode, > >>> .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff -- > >> git > >>> a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>> b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>> index dbed72c1e0c6..1eb4e613b27a 100644 > >>> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void > >> *handle) > >>> { > >>> struct pp_hwmgr *hwmgr = handle; > >>> struct amdgpu_device *adev = hwmgr->adev; > >>> - int i = 0; > >>> - > >>> - if (adev->mode_info.num_crtc) > >>> - amdgpu_display_bandwidth_update(adev); > >>> - > >>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > >>> - struct amdgpu_ring *ring = adev->rings[i]; > >>> - if (ring && ring->sched.ready) > >>> - amdgpu_fence_wait_empty(ring); > >>> - } > >>> > >>> if (!amdgpu_device_has_dc_support(adev)) { > >>> amdgpu_dpm_get_active_displays(adev); > >>> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-22 8:53 ` Quan, Evan @ 2022-04-22 9:25 ` Lazar, Lijo 2022-04-22 9:43 ` Quan, Evan 0 siblings, 1 reply; 9+ messages in thread From: Lazar, Lijo @ 2022-04-22 9:25 UTC (permalink / raw) To: Quan, Evan, amd-gfx; +Cc: Deucher, Alexander, pmenzel, arthur.marsh On 4/22/2022 2:23 PM, Quan, Evan wrote: > [AMD Official Use Only] > > > >> -----Original Message----- >> From: Lazar, Lijo <Lijo.Lazar@amd.com> >> Sent: Friday, April 22, 2022 4:36 PM >> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org >> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; >> pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net >> Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI >> >> >> >> On 4/22/2022 1:57 PM, Quan, Evan wrote: >>> [AMD Official Use Only] >>> >>> >>> >>>> -----Original Message----- >>>> From: Lazar, Lijo <Lijo.Lazar@amd.com> >>>> Sent: Monday, April 11, 2022 6:06 PM >>>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org >>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; >>>> pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net >>>> Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed >>>> on SI >>>> >>>> >>>> >>>> On 4/11/2022 2:24 PM, Evan Quan wrote: >>>>> The adev->pm.mutx is already held at the beginning of >>>>> >>>> >> amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_ >>>> enable_vce. >>>>> But on their calling path, amdgpu_display_bandwidth_update will be >>>>> called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They >>>>> will then try to acquire the same adev->pm.mutex and deadlock will >> occur. >>>>> >>>> >>>> What about using locked versions of get_sclk/mclk and leave the rest >>>> as they are? >>> [Quan, Evan] Yeah, that(adding two new APIs for locked versions of >> get_sclk/mclk) should also work. But considering there cannot be other ASICs >> who need them, they are kind of redundant. >>> Meanwhile my version is just to get everything reverted back to its original. >> So, it should be quite safe. How do you think? >>> >> >> As far as I see - >> >> 1) It moves display watermark updates outside of locking >> 2) It adds bandwidth update under common compute_clocks paths. > [Quan, Evan] I suppose what you mentioned is about "amdgpu_display_bandwidth_update". > No, I just double confirmed(the old code in our amd-staging-dkms-5.9/5.10 branch) and it did come with no lock protection. > For those legacy ASICs, the watermark setting was performed by programming registers directly(handled at display side). > That might explain why it did not have lock protection as recent ASICs. > Yes, I was referring to amdgpu_display_bandwidth_update. You are right, I see it getting called outside of pm mutex at least. Also it is called commonly in amdgpu_pm_compute_clocks. Let's proceed it this way for now. Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Thanks, Lijo > BR > Evan >> >> I guess it is not the same as pre-mutex change. >> >> Thanks, >> Lijo >> >>> BR >>> Evan >>>> >>>> Thanks, >>>> Lijo >>>> >>>>> By placing amdgpu_display_bandwidth_update outside of adev- >> pm.mutex >>>>> protection(considering logically they do not need such protection) >>>>> and restructuring the call flow accordingly, we can eliminate the >>>>> deadlock issue. This comes with no real logics change. >>>>> >>>>> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in >>>>> amdgpu_dpm.c") >>>>> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> >>>>> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> >>>>> Link: https: >>>>> //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee- >> 32c4cf7d8f9c@molgen.mpg. >>>>> de/ >>>>> BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 >>>>> Signed-off-by: Evan Quan <evan.quan@amd.com> >>>>> -- >>>>> v1->v2: >>>>> - rich the commit messages(Paul) >>>>> --- >>>>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 >>>> +++++++++++++++++++ >>>>> .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- >>>>> drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- >>>>> .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- >>>>> 4 files changed, 39 insertions(+), 55 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>>>> index 5504d81c77b7..72e7b5d40af6 100644 >>>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c >>>>> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct >>>> amdgpu_device *adev, enum amd_pp_sensors senso >>>>> void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) >>>>> { >>>>> const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; >>>>> + int i; >>>>> >>>>> if (!adev->pm.dpm_enabled) >>>>> return; >>>>> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct >>>> amdgpu_device *adev) >>>>> if (!pp_funcs->pm_compute_clocks) >>>>> return; >>>>> >>>>> + if (adev->mode_info.num_crtc) >>>>> + amdgpu_display_bandwidth_update(adev); >>>>> + >>>>> + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { >>>>> + struct amdgpu_ring *ring = adev->rings[i]; >>>>> + if (ring && ring->sched.ready) >>>>> + amdgpu_fence_wait_empty(ring); >>>>> + } >>>>> + >>>>> mutex_lock(&adev->pm.mutex); >>>>> pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); >>>>> mutex_unlock(&adev->pm.mutex); >>>>> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct >>>> amdgpu_device *adev, bool enable) >>>>> { >>>>> int ret = 0; >>>>> >>>>> + if (adev->family == AMDGPU_FAMILY_SI) { >>>>> + mutex_lock(&adev->pm.mutex); >>>>> + if (enable) { >>>>> + adev->pm.dpm.uvd_active = true; >>>>> + adev->pm.dpm.state = >>>> POWER_STATE_TYPE_INTERNAL_UVD; >>>>> + } else { >>>>> + adev->pm.dpm.uvd_active = false; >>>>> + } >>>>> + mutex_unlock(&adev->pm.mutex); >>>>> + >>>>> + amdgpu_dpm_compute_clocks(adev); >>>>> + return; >>>>> + } >>>>> + >>>>> ret = amdgpu_dpm_set_powergating_by_smu(adev, >>>> AMD_IP_BLOCK_TYPE_UVD, !enable); >>>>> if (ret) >>>>> DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 >>>> +477,21 @@ >>>>> void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool >> enable) >>>>> { >>>>> int ret = 0; >>>>> >>>>> + if (adev->family == AMDGPU_FAMILY_SI) { >>>>> + mutex_lock(&adev->pm.mutex); >>>>> + if (enable) { >>>>> + adev->pm.dpm.vce_active = true; >>>>> + /* XXX select vce level based on ring/task */ >>>>> + adev->pm.dpm.vce_level = >>>> AMD_VCE_LEVEL_AC_ALL; >>>>> + } else { >>>>> + adev->pm.dpm.vce_active = false; >>>>> + } >>>>> + mutex_unlock(&adev->pm.mutex); >>>>> + >>>>> + amdgpu_dpm_compute_clocks(adev); >>>>> + return; >>>>> + } >>>>> + >>>>> ret = amdgpu_dpm_set_powergating_by_smu(adev, >>>> AMD_IP_BLOCK_TYPE_VCE, !enable); >>>>> if (ret) >>>>> DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git >>>>> a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>>>> b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>>>> index 9613c6181c17..d3fe149d8476 100644 >>>>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>>>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c >>>>> @@ -1028,16 +1028,6 @@ static int >>>> amdgpu_dpm_change_power_state_locked(struct amdgpu_device >> *adev) >>>>> void amdgpu_legacy_dpm_compute_clocks(void *handle) >>>>> { >>>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>>>> - int i = 0; >>>>> - >>>>> - if (adev->mode_info.num_crtc) >>>>> - amdgpu_display_bandwidth_update(adev); >>>>> - >>>>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { >>>>> - struct amdgpu_ring *ring = adev->rings[i]; >>>>> - if (ring && ring->sched.ready) >>>>> - amdgpu_fence_wait_empty(ring); >>>>> - } >>>>> >>>>> amdgpu_dpm_get_active_displays(adev); >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>>>> b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>>>> index caae54487f9c..633dab14f51c 100644 >>>>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>>>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c >>>>> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct >>>> amdgpu_device *adev) >>>>> } >>>>> #endif >>>>> >>>>> -static int si_set_powergating_by_smu(void *handle, >>>>> - uint32_t block_type, >>>>> - bool gate) >>>>> -{ >>>>> - struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>>>> - >>>>> - switch (block_type) { >>>>> - case AMD_IP_BLOCK_TYPE_UVD: >>>>> - if (!gate) { >>>>> - adev->pm.dpm.uvd_active = true; >>>>> - adev->pm.dpm.state = >>>> POWER_STATE_TYPE_INTERNAL_UVD; >>>>> - } else { >>>>> - adev->pm.dpm.uvd_active = false; >>>>> - } >>>>> - >>>>> - amdgpu_legacy_dpm_compute_clocks(handle); >>>>> - break; >>>>> - case AMD_IP_BLOCK_TYPE_VCE: >>>>> - if (!gate) { >>>>> - adev->pm.dpm.vce_active = true; >>>>> - /* XXX select vce level based on ring/task */ >>>>> - adev->pm.dpm.vce_level = >>>> AMD_VCE_LEVEL_AC_ALL; >>>>> - } else { >>>>> - adev->pm.dpm.vce_active = false; >>>>> - } >>>>> - >>>>> - amdgpu_legacy_dpm_compute_clocks(handle); >>>>> - break; >>>>> - default: >>>>> - break; >>>>> - } >>>>> - return 0; >>>>> -} >>>>> - >>>>> static int si_set_sw_state(struct amdgpu_device *adev) >>>>> { >>>>> return (amdgpu_si_send_msg_to_smc(adev, >>>> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? >>>>> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs >> si_dpm_funcs >>>> = { >>>>> .print_power_state = &si_dpm_print_power_state, >>>>> .debugfs_print_current_performance_level = >>>> &si_dpm_debugfs_print_current_performance_level, >>>>> .force_performance_level = &si_dpm_force_performance_level, >>>>> - .set_powergating_by_smu = &si_set_powergating_by_smu, >>>>> .vblank_too_short = &si_dpm_vblank_too_short, >>>>> .set_fan_control_mode = &si_dpm_set_fan_control_mode, >>>>> .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff -- >>>> git >>>>> a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>>>> b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>>>> index dbed72c1e0c6..1eb4e613b27a 100644 >>>>> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>>>> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c >>>>> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void >>>> *handle) >>>>> { >>>>> struct pp_hwmgr *hwmgr = handle; >>>>> struct amdgpu_device *adev = hwmgr->adev; >>>>> - int i = 0; >>>>> - >>>>> - if (adev->mode_info.num_crtc) >>>>> - amdgpu_display_bandwidth_update(adev); >>>>> - >>>>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { >>>>> - struct amdgpu_ring *ring = adev->rings[i]; >>>>> - if (ring && ring->sched.ready) >>>>> - amdgpu_fence_wait_empty(ring); >>>>> - } >>>>> >>>>> if (!amdgpu_device_has_dc_support(adev)) { >>>>> amdgpu_dpm_get_active_displays(adev); >>>>> ^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-22 9:25 ` Lazar, Lijo @ 2022-04-22 9:43 ` Quan, Evan 0 siblings, 0 replies; 9+ messages in thread From: Quan, Evan @ 2022-04-22 9:43 UTC (permalink / raw) To: Lazar, Lijo, amd-gfx, Deucher, Alexander; +Cc: pmenzel, arthur.marsh [AMD Official Use Only] Thanks Lijo. @Deucher, Alexander I just landed the change on our drm-next branch. BR Evan > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@amd.com> > Sent: Friday, April 22, 2022 5:25 PM > To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI > > > > On 4/22/2022 2:23 PM, Quan, Evan wrote: > > [AMD Official Use Only] > > > > > > > >> -----Original Message----- > >> From: Lazar, Lijo <Lijo.Lazar@amd.com> > >> Sent: Friday, April 22, 2022 4:36 PM > >> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org > >> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > >> pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > >> Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed > >> on SI > >> > >> > >> > >> On 4/22/2022 1:57 PM, Quan, Evan wrote: > >>> [AMD Official Use Only] > >>> > >>> > >>> > >>>> -----Original Message----- > >>>> From: Lazar, Lijo <Lijo.Lazar@amd.com> > >>>> Sent: Monday, April 11, 2022 6:06 PM > >>>> To: Quan, Evan <Evan.Quan@amd.com>; amd- > gfx@lists.freedesktop.org > >>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > >>>> pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > >>>> Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed > >>>> on SI > >>>> > >>>> > >>>> > >>>> On 4/11/2022 2:24 PM, Evan Quan wrote: > >>>>> The adev->pm.mutx is already held at the beginning of > >>>>> > >>>> > >> > amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_ > >>>> enable_vce. > >>>>> But on their calling path, amdgpu_display_bandwidth_update will be > >>>>> called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They > >>>>> will then try to acquire the same adev->pm.mutex and deadlock will > >> occur. > >>>>> > >>>> > >>>> What about using locked versions of get_sclk/mclk and leave the > >>>> rest as they are? > >>> [Quan, Evan] Yeah, that(adding two new APIs for locked versions of > >> get_sclk/mclk) should also work. But considering there cannot be > >> other ASICs who need them, they are kind of redundant. > >>> Meanwhile my version is just to get everything reverted back to its > original. > >> So, it should be quite safe. How do you think? > >>> > >> > >> As far as I see - > >> > >> 1) It moves display watermark updates outside of locking > >> 2) It adds bandwidth update under common compute_clocks paths. > > [Quan, Evan] I suppose what you mentioned is about > "amdgpu_display_bandwidth_update". > > No, I just double confirmed(the old code in our amd-staging-dkms-5.9/5.10 > branch) and it did come with no lock protection. > > For those legacy ASICs, the watermark setting was performed by > programming registers directly(handled at display side). > > That might explain why it did not have lock protection as recent ASICs. > > > > Yes, I was referring to amdgpu_display_bandwidth_update. You are right, I > see it getting called outside of pm mutex at least. Also it is called commonly in > amdgpu_pm_compute_clocks. > > Let's proceed it this way for now. > > Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> > > Thanks, > Lijo > > > BR > > Evan > >> > >> I guess it is not the same as pre-mutex change. > >> > >> Thanks, > >> Lijo > >> > >>> BR > >>> Evan > >>>> > >>>> Thanks, > >>>> Lijo > >>>> > >>>>> By placing amdgpu_display_bandwidth_update outside of adev- > >> pm.mutex > >>>>> protection(considering logically they do not need such protection) > >>>>> and restructuring the call flow accordingly, we can eliminate the > >>>>> deadlock issue. This comes with no real logics change. > >>>>> > >>>>> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in > >>>>> amdgpu_dpm.c") > >>>>> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > >>>>> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> > >>>>> Link: https: > >>>>> //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee- > >> 32c4cf7d8f9c@molgen.mpg. > >>>>> de/ > >>>>> BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 > >>>>> Signed-off-by: Evan Quan <evan.quan@amd.com> > >>>>> -- > >>>>> v1->v2: > >>>>> - rich the commit messages(Paul) > >>>>> --- > >>>>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 > >>>> +++++++++++++++++++ > >>>>> .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- > >>>>> drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 --------------- > -- > >>>>> .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- > >>>>> 4 files changed, 39 insertions(+), 55 deletions(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>>>> index 5504d81c77b7..72e7b5d40af6 100644 > >>>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > >>>>> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct > >>>> amdgpu_device *adev, enum amd_pp_sensors senso > >>>>> void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > >>>>> { > >>>>> const struct amd_pm_funcs *pp_funcs = > >>>>> adev->powerplay.pp_funcs; > >>>>> + int i; > >>>>> > >>>>> if (!adev->pm.dpm_enabled) > >>>>> return; > >>>>> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct > >>>> amdgpu_device *adev) > >>>>> if (!pp_funcs->pm_compute_clocks) > >>>>> return; > >>>>> > >>>>> + if (adev->mode_info.num_crtc) > >>>>> + amdgpu_display_bandwidth_update(adev); > >>>>> + > >>>>> + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > >>>>> + struct amdgpu_ring *ring = adev->rings[i]; > >>>>> + if (ring && ring->sched.ready) > >>>>> + amdgpu_fence_wait_empty(ring); > >>>>> + } > >>>>> + > >>>>> mutex_lock(&adev->pm.mutex); > >>>>> pp_funcs->pm_compute_clocks(adev- > >powerplay.pp_handle); > >>>>> mutex_unlock(&adev->pm.mutex); @@ -443,6 +453,20 @@ > void > >>>>> amdgpu_dpm_enable_uvd(struct > >>>> amdgpu_device *adev, bool enable) > >>>>> { > >>>>> int ret = 0; > >>>>> > >>>>> + if (adev->family == AMDGPU_FAMILY_SI) { > >>>>> + mutex_lock(&adev->pm.mutex); > >>>>> + if (enable) { > >>>>> + adev->pm.dpm.uvd_active = true; > >>>>> + adev->pm.dpm.state = > >>>> POWER_STATE_TYPE_INTERNAL_UVD; > >>>>> + } else { > >>>>> + adev->pm.dpm.uvd_active = false; > >>>>> + } > >>>>> + mutex_unlock(&adev->pm.mutex); > >>>>> + > >>>>> + amdgpu_dpm_compute_clocks(adev); > >>>>> + return; > >>>>> + } > >>>>> + > >>>>> ret = amdgpu_dpm_set_powergating_by_smu(adev, > >>>> AMD_IP_BLOCK_TYPE_UVD, !enable); > >>>>> if (ret) > >>>>> DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ > -453,6 > >>>> +477,21 @@ > >>>>> void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool > >> enable) > >>>>> { > >>>>> int ret = 0; > >>>>> > >>>>> + if (adev->family == AMDGPU_FAMILY_SI) { > >>>>> + mutex_lock(&adev->pm.mutex); > >>>>> + if (enable) { > >>>>> + adev->pm.dpm.vce_active = true; > >>>>> + /* XXX select vce level based on ring/task */ > >>>>> + adev->pm.dpm.vce_level = > >>>> AMD_VCE_LEVEL_AC_ALL; > >>>>> + } else { > >>>>> + adev->pm.dpm.vce_active = false; > >>>>> + } > >>>>> + mutex_unlock(&adev->pm.mutex); > >>>>> + > >>>>> + amdgpu_dpm_compute_clocks(adev); > >>>>> + return; > >>>>> + } > >>>>> + > >>>>> ret = amdgpu_dpm_set_powergating_by_smu(adev, > >>>> AMD_IP_BLOCK_TYPE_VCE, !enable); > >>>>> if (ret) > >>>>> DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff -- > git > >>>>> a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>>>> b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>>>> index 9613c6181c17..d3fe149d8476 100644 > >>>>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>>>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > >>>>> @@ -1028,16 +1028,6 @@ static int > >>>> amdgpu_dpm_change_power_state_locked(struct amdgpu_device > >> *adev) > >>>>> void amdgpu_legacy_dpm_compute_clocks(void *handle) > >>>>> { > >>>>> struct amdgpu_device *adev = (struct amdgpu_device > *)handle; > >>>>> - int i = 0; > >>>>> - > >>>>> - if (adev->mode_info.num_crtc) > >>>>> - amdgpu_display_bandwidth_update(adev); > >>>>> - > >>>>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > >>>>> - struct amdgpu_ring *ring = adev->rings[i]; > >>>>> - if (ring && ring->sched.ready) > >>>>> - amdgpu_fence_wait_empty(ring); > >>>>> - } > >>>>> > >>>>> amdgpu_dpm_get_active_displays(adev); > >>>>> > >>>>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>>>> b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>>>> index caae54487f9c..633dab14f51c 100644 > >>>>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>>>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > >>>>> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct > >>>> amdgpu_device *adev) > >>>>> } > >>>>> #endif > >>>>> > >>>>> -static int si_set_powergating_by_smu(void *handle, > >>>>> - uint32_t block_type, > >>>>> - bool gate) > >>>>> -{ > >>>>> - struct amdgpu_device *adev = (struct amdgpu_device > *)handle; > >>>>> - > >>>>> - switch (block_type) { > >>>>> - case AMD_IP_BLOCK_TYPE_UVD: > >>>>> - if (!gate) { > >>>>> - adev->pm.dpm.uvd_active = true; > >>>>> - adev->pm.dpm.state = > >>>> POWER_STATE_TYPE_INTERNAL_UVD; > >>>>> - } else { > >>>>> - adev->pm.dpm.uvd_active = false; > >>>>> - } > >>>>> - > >>>>> - amdgpu_legacy_dpm_compute_clocks(handle); > >>>>> - break; > >>>>> - case AMD_IP_BLOCK_TYPE_VCE: > >>>>> - if (!gate) { > >>>>> - adev->pm.dpm.vce_active = true; > >>>>> - /* XXX select vce level based on ring/task */ > >>>>> - adev->pm.dpm.vce_level = > >>>> AMD_VCE_LEVEL_AC_ALL; > >>>>> - } else { > >>>>> - adev->pm.dpm.vce_active = false; > >>>>> - } > >>>>> - > >>>>> - amdgpu_legacy_dpm_compute_clocks(handle); > >>>>> - break; > >>>>> - default: > >>>>> - break; > >>>>> - } > >>>>> - return 0; > >>>>> -} > >>>>> - > >>>>> static int si_set_sw_state(struct amdgpu_device *adev) > >>>>> { > >>>>> return (amdgpu_si_send_msg_to_smc(adev, > >>>> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? > >>>>> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs > >> si_dpm_funcs > >>>> = { > >>>>> .print_power_state = &si_dpm_print_power_state, > >>>>> .debugfs_print_current_performance_level = > >>>> &si_dpm_debugfs_print_current_performance_level, > >>>>> .force_performance_level = > &si_dpm_force_performance_level, > >>>>> - .set_powergating_by_smu = &si_set_powergating_by_smu, > >>>>> .vblank_too_short = &si_dpm_vblank_too_short, > >>>>> .set_fan_control_mode = &si_dpm_set_fan_control_mode, > >>>>> .get_fan_control_mode = &si_dpm_get_fan_control_mode, > diff -- > >>>> git > >>>>> a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>>>> b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>>>> index dbed72c1e0c6..1eb4e613b27a 100644 > >>>>> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>>>> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > >>>>> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void > >>>> *handle) > >>>>> { > >>>>> struct pp_hwmgr *hwmgr = handle; > >>>>> struct amdgpu_device *adev = hwmgr->adev; > >>>>> - int i = 0; > >>>>> - > >>>>> - if (adev->mode_info.num_crtc) > >>>>> - amdgpu_display_bandwidth_update(adev); > >>>>> - > >>>>> - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > >>>>> - struct amdgpu_ring *ring = adev->rings[i]; > >>>>> - if (ring && ring->sched.ready) > >>>>> - amdgpu_fence_wait_empty(ring); > >>>>> - } > >>>>> > >>>>> if (!amdgpu_device_has_dc_support(adev)) { > >>>>> amdgpu_dpm_get_active_displays(adev); > >>>>> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-11 8:54 [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI Evan Quan 2022-04-11 10:06 ` Lazar, Lijo @ 2022-04-21 8:29 ` Thorsten Leemhuis 2022-04-22 8:16 ` Quan, Evan 1 sibling, 1 reply; 9+ messages in thread From: Thorsten Leemhuis @ 2022-04-21 8:29 UTC (permalink / raw) To: Evan Quan, amd-gfx; +Cc: Alexander.Deucher, pmenzel, arthur.marsh On 11.04.22 10:54, Evan Quan wrote: > The adev->pm.mutx is already held at the beginning of > amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_enable_vce. > But on their calling path, amdgpu_display_bandwidth_update will be > called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They > will then try to acquire the same adev->pm.mutex and deadlock will > occur. > > By placing amdgpu_display_bandwidth_update outside of adev->pm.mutex > protection(considering logically they do not need such protection) and > restructuring the call flow accordingly, we can eliminate the deadlock > issue. This comes with no real logics change. > > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c") > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> > Link: https: //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de/ > BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 Side note: two spaces sneaked in there. But that's not why I write this mail. This patch is fixing a regression in 5.18-rc, but it looks like things are stuck for more than a week now. What's up there? Or was progress made somewhere and I just couldn't find it? Was the review comment from Lijo addressed? Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) P.S.: As the Linux kernel's regression tracker I'm getting a lot of reports on my table. I can only look briefly into most of them and lack knowledge about most of the areas they concern. I thus unfortunately will sometimes get things wrong or miss something important. I hope that's not the case here; if you think it is, don't hesitate to tell me in a public reply, it's in everyone's interest to set the public record straight. #regzbot ignore-activity #regzbot ^backmonitor: https://lore.kernel.org/all/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de/ > Signed-off-by: Evan Quan <evan.quan@amd.com> > -- > v1->v2: > - rich the commit messages(Paul) > --- > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 +++++++++++++++++++ > .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- > drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- > .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- > 4 files changed, 39 insertions(+), 55 deletions(-) > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > index 5504d81c77b7..72e7b5d40af6 100644 > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso > void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > { > const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > + int i; > > if (!adev->pm.dpm_enabled) > return; > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > if (!pp_funcs->pm_compute_clocks) > return; > > + if (adev->mode_info.num_crtc) > + amdgpu_display_bandwidth_update(adev); > + > + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > + struct amdgpu_ring *ring = adev->rings[i]; > + if (ring && ring->sched.ready) > + amdgpu_fence_wait_empty(ring); > + } > + > mutex_lock(&adev->pm.mutex); > pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); > mutex_unlock(&adev->pm.mutex); > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) > { > int ret = 0; > > + if (adev->family == AMDGPU_FAMILY_SI) { > + mutex_lock(&adev->pm.mutex); > + if (enable) { > + adev->pm.dpm.uvd_active = true; > + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; > + } else { > + adev->pm.dpm.uvd_active = false; > + } > + mutex_unlock(&adev->pm.mutex); > + > + amdgpu_dpm_compute_clocks(adev); > + return; > + } > + > ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); > if (ret) > DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", > @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) > { > int ret = 0; > > + if (adev->family == AMDGPU_FAMILY_SI) { > + mutex_lock(&adev->pm.mutex); > + if (enable) { > + adev->pm.dpm.vce_active = true; > + /* XXX select vce level based on ring/task */ > + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; > + } else { > + adev->pm.dpm.vce_active = false; > + } > + mutex_unlock(&adev->pm.mutex); > + > + amdgpu_dpm_compute_clocks(adev); > + return; > + } > + > ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); > if (ret) > DRM_ERROR("Dpm %s vce failed, ret = %d. \n", > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > index 9613c6181c17..d3fe149d8476 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) > void amdgpu_legacy_dpm_compute_clocks(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > - int i = 0; > - > - if (adev->mode_info.num_crtc) > - amdgpu_display_bandwidth_update(adev); > - > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > - struct amdgpu_ring *ring = adev->rings[i]; > - if (ring && ring->sched.ready) > - amdgpu_fence_wait_empty(ring); > - } > > amdgpu_dpm_get_active_displays(adev); > > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > index caae54487f9c..633dab14f51c 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev) > } > #endif > > -static int si_set_powergating_by_smu(void *handle, > - uint32_t block_type, > - bool gate) > -{ > - struct amdgpu_device *adev = (struct amdgpu_device *)handle; > - > - switch (block_type) { > - case AMD_IP_BLOCK_TYPE_UVD: > - if (!gate) { > - adev->pm.dpm.uvd_active = true; > - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; > - } else { > - adev->pm.dpm.uvd_active = false; > - } > - > - amdgpu_legacy_dpm_compute_clocks(handle); > - break; > - case AMD_IP_BLOCK_TYPE_VCE: > - if (!gate) { > - adev->pm.dpm.vce_active = true; > - /* XXX select vce level based on ring/task */ > - adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; > - } else { > - adev->pm.dpm.vce_active = false; > - } > - > - amdgpu_legacy_dpm_compute_clocks(handle); > - break; > - default: > - break; > - } > - return 0; > -} > - > static int si_set_sw_state(struct amdgpu_device *adev) > { > return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = { > .print_power_state = &si_dpm_print_power_state, > .debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level, > .force_performance_level = &si_dpm_force_performance_level, > - .set_powergating_by_smu = &si_set_powergating_by_smu, > .vblank_too_short = &si_dpm_vblank_too_short, > .set_fan_control_mode = &si_dpm_set_fan_control_mode, > .get_fan_control_mode = &si_dpm_get_fan_control_mode, > diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > index dbed72c1e0c6..1eb4e613b27a 100644 > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle) > { > struct pp_hwmgr *hwmgr = handle; > struct amdgpu_device *adev = hwmgr->adev; > - int i = 0; > - > - if (adev->mode_info.num_crtc) > - amdgpu_display_bandwidth_update(adev); > - > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > - struct amdgpu_ring *ring = adev->rings[i]; > - if (ring && ring->sched.ready) > - amdgpu_fence_wait_empty(ring); > - } > > if (!amdgpu_device_has_dc_support(adev)) { > amdgpu_dpm_get_active_displays(adev); ^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI 2022-04-21 8:29 ` Thorsten Leemhuis @ 2022-04-22 8:16 ` Quan, Evan 0 siblings, 0 replies; 9+ messages in thread From: Quan, Evan @ 2022-04-22 8:16 UTC (permalink / raw) To: Thorsten Leemhuis, amd-gfx; +Cc: Deucher, Alexander, pmenzel, arthur.marsh [AMD Official Use Only] > -----Original Message----- > From: Thorsten Leemhuis <regressions@leemhuis.info> > Sent: Thursday, April 21, 2022 4:29 PM > To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org > Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; > pmenzel@molgen.mpg.de; arthur.marsh@internode.on.net > Subject: Re: [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI > > On 11.04.22 10:54, Evan Quan wrote: > > The adev->pm.mutx is already held at the beginning of > > > amdgpu_dpm_compute_clocks/amdgpu_dpm_enable_uvd/amdgpu_dpm_ > enable_vce. > > But on their calling path, amdgpu_display_bandwidth_update will be > > called and thus its sub functions amdgpu_dpm_get_sclk/mclk. They > > will then try to acquire the same adev->pm.mutex and deadlock will > > occur. > > > > By placing amdgpu_display_bandwidth_update outside of adev- > >pm.mutex > > protection(considering logically they do not need such protection) and > > restructuring the call flow accordingly, we can eliminate the deadlock > > issue. This comes with no real logics change. > > > > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in > amdgpu_dpm.c") > > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > > Reported-by: Arthur Marsh <arthur.marsh@internode.on.net> > > Link: https: //lore.kernel.org/all/9e689fea-6c69-f4b0-8dee- > 32c4cf7d8f9c@molgen.mpg.de/ > > BugLink: https: //gitlab.freedesktop.org/drm/amd/-/issues/1957 > > Side note: two spaces sneaked in there. But that's not why I write this > mail. [Quan, Evan] Thanks! Will fix that. > > This patch is fixing a regression in 5.18-rc, but it looks like things > are stuck for more than a week now. What's up there? Or was progress > made somewhere and I just couldn't find it? Was the review comment from > Lijo addressed? [Quan, Evan] Sorry, was busy with other stuffs. Let me check. BR Evan > > Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) > > P.S.: As the Linux kernel's regression tracker I'm getting a lot of > reports on my table. I can only look briefly into most of them and lack > knowledge about most of the areas they concern. I thus unfortunately > will sometimes get things wrong or miss something important. I hope > that's not the case here; if you think it is, don't hesitate to tell me > in a public reply, it's in everyone's interest to set the public record > straight. > > #regzbot ignore-activity > #regzbot ^backmonitor: > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore. > kernel.org%2Fall%2F9e689fea-6c69-f4b0-8dee- > 32c4cf7d8f9c%40molgen.mpg.de%2F&data=05%7C01%7Cevan.quan%4 > 0amd.com%7Cf8885e7025e9445db6e508da23710586%7C3dd8961fe4884e608 > e11a82d994e183d%7C0%7C0%7C637861265579473039%7CUnknown%7CTWF > pbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXV > CI6Mn0%3D%7C3000%7C%7C%7C&sdata=47%2FiLxh3iw9jN%2B0KiUfcO > 79u7kpjaNKkueZE%2BMEDtl8%3D&reserved=0 > > > Signed-off-by: Evan Quan <evan.quan@amd.com> > > -- > > v1->v2: > > - rich the commit messages(Paul) > > --- > > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 39 > +++++++++++++++++++ > > .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 10 ----- > > drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 35 ----------------- > > .../gpu/drm/amd/pm/powerplay/amd_powerplay.c | 10 ----- > > 4 files changed, 39 insertions(+), 55 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > index 5504d81c77b7..72e7b5d40af6 100644 > > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct > amdgpu_device *adev, enum amd_pp_sensors senso > > void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) > > { > > const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > > + int i; > > > > if (!adev->pm.dpm_enabled) > > return; > > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct > amdgpu_device *adev) > > if (!pp_funcs->pm_compute_clocks) > > return; > > > > + if (adev->mode_info.num_crtc) > > + amdgpu_display_bandwidth_update(adev); > > + > > + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > > + struct amdgpu_ring *ring = adev->rings[i]; > > + if (ring && ring->sched.ready) > > + amdgpu_fence_wait_empty(ring); > > + } > > + > > mutex_lock(&adev->pm.mutex); > > pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); > > mutex_unlock(&adev->pm.mutex); > > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct > amdgpu_device *adev, bool enable) > > { > > int ret = 0; > > > > + if (adev->family == AMDGPU_FAMILY_SI) { > > + mutex_lock(&adev->pm.mutex); > > + if (enable) { > > + adev->pm.dpm.uvd_active = true; > > + adev->pm.dpm.state = > POWER_STATE_TYPE_INTERNAL_UVD; > > + } else { > > + adev->pm.dpm.uvd_active = false; > > + } > > + mutex_unlock(&adev->pm.mutex); > > + > > + amdgpu_dpm_compute_clocks(adev); > > + return; > > + } > > + > > ret = amdgpu_dpm_set_powergating_by_smu(adev, > AMD_IP_BLOCK_TYPE_UVD, !enable); > > if (ret) > > DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", > > @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct > amdgpu_device *adev, bool enable) > > { > > int ret = 0; > > > > + if (adev->family == AMDGPU_FAMILY_SI) { > > + mutex_lock(&adev->pm.mutex); > > + if (enable) { > > + adev->pm.dpm.vce_active = true; > > + /* XXX select vce level based on ring/task */ > > + adev->pm.dpm.vce_level = > AMD_VCE_LEVEL_AC_ALL; > > + } else { > > + adev->pm.dpm.vce_active = false; > > + } > > + mutex_unlock(&adev->pm.mutex); > > + > > + amdgpu_dpm_compute_clocks(adev); > > + return; > > + } > > + > > ret = amdgpu_dpm_set_powergating_by_smu(adev, > AMD_IP_BLOCK_TYPE_VCE, !enable); > > if (ret) > > DRM_ERROR("Dpm %s vce failed, ret = %d. \n", > > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > index 9613c6181c17..d3fe149d8476 100644 > > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > > @@ -1028,16 +1028,6 @@ static int > amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) > > void amdgpu_legacy_dpm_compute_clocks(void *handle) > > { > > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - int i = 0; > > - > > - if (adev->mode_info.num_crtc) > > - amdgpu_display_bandwidth_update(adev); > > - > > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > > - struct amdgpu_ring *ring = adev->rings[i]; > > - if (ring && ring->sched.ready) > > - amdgpu_fence_wait_empty(ring); > > - } > > > > amdgpu_dpm_get_active_displays(adev); > > > > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > index caae54487f9c..633dab14f51c 100644 > > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct > amdgpu_device *adev) > > } > > #endif > > > > -static int si_set_powergating_by_smu(void *handle, > > - uint32_t block_type, > > - bool gate) > > -{ > > - struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - > > - switch (block_type) { > > - case AMD_IP_BLOCK_TYPE_UVD: > > - if (!gate) { > > - adev->pm.dpm.uvd_active = true; > > - adev->pm.dpm.state = > POWER_STATE_TYPE_INTERNAL_UVD; > > - } else { > > - adev->pm.dpm.uvd_active = false; > > - } > > - > > - amdgpu_legacy_dpm_compute_clocks(handle); > > - break; > > - case AMD_IP_BLOCK_TYPE_VCE: > > - if (!gate) { > > - adev->pm.dpm.vce_active = true; > > - /* XXX select vce level based on ring/task */ > > - adev->pm.dpm.vce_level = > AMD_VCE_LEVEL_AC_ALL; > > - } else { > > - adev->pm.dpm.vce_active = false; > > - } > > - > > - amdgpu_legacy_dpm_compute_clocks(handle); > > - break; > > - default: > > - break; > > - } > > - return 0; > > -} > > - > > static int si_set_sw_state(struct amdgpu_device *adev) > > { > > return (amdgpu_si_send_msg_to_smc(adev, > PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? > > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs > = { > > .print_power_state = &si_dpm_print_power_state, > > .debugfs_print_current_performance_level = > &si_dpm_debugfs_print_current_performance_level, > > .force_performance_level = &si_dpm_force_performance_level, > > - .set_powergating_by_smu = &si_set_powergating_by_smu, > > .vblank_too_short = &si_dpm_vblank_too_short, > > .set_fan_control_mode = &si_dpm_set_fan_control_mode, > > .get_fan_control_mode = &si_dpm_get_fan_control_mode, > > diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > index dbed72c1e0c6..1eb4e613b27a 100644 > > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c > > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void > *handle) > > { > > struct pp_hwmgr *hwmgr = handle; > > struct amdgpu_device *adev = hwmgr->adev; > > - int i = 0; > > - > > - if (adev->mode_info.num_crtc) > > - amdgpu_display_bandwidth_update(adev); > > - > > - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { > > - struct amdgpu_ring *ring = adev->rings[i]; > > - if (ring && ring->sched.ready) > > - amdgpu_fence_wait_empty(ring); > > - } > > > > if (!amdgpu_device_has_dc_support(adev)) { > > amdgpu_dpm_get_active_displays(adev); ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-04-22 9:43 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-04-11 8:54 [PATCH V2] drm/amd/pm: fix the deadlock issue observed on SI Evan Quan 2022-04-11 10:06 ` Lazar, Lijo 2022-04-22 8:27 ` Quan, Evan 2022-04-22 8:36 ` Lazar, Lijo 2022-04-22 8:53 ` Quan, Evan 2022-04-22 9:25 ` Lazar, Lijo 2022-04-22 9:43 ` Quan, Evan 2022-04-21 8:29 ` Thorsten Leemhuis 2022-04-22 8:16 ` Quan, Evan
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).